From 218272c17be44fdaf77f555a7731a754c87909d9 Mon Sep 17 00:00:00 2001
From: Yannick Marcon <yannick.marcon@obiba.org>
Date: Wed, 18 Sep 2019 09:20:33 +0200
Subject: [PATCH 1/2] URL's scheme parser is compliant with RFC 3986

---
 R/url.r                   |  2 +-
 tests/testthat/test-url.r | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/R/url.r b/R/url.r
index c3f6d33f..70c76d5a 100644
--- a/R/url.r
+++ b/R/url.r
@@ -45,7 +45,7 @@ parse_url <- function(url) {
   }
 
   fragment <- pull_off("#(.*)$")
-  scheme <- pull_off("^([[:alpha:]+.-]+):")
+  scheme <- pull_off("^([[:alpha:]][[:alpha:][:digit:]+.-]*):")
   netloc <- pull_off("^//([^/?]*)/?")
 
   if (identical(netloc, "")) { # corresponds to ///
diff --git a/tests/testthat/test-url.r b/tests/testthat/test-url.r
index 9b026681..bd9f2334 100644
--- a/tests/testthat/test-url.r
+++ b/tests/testthat/test-url.r
@@ -85,6 +85,36 @@ test_that("parse_url preserves leading / in path", {
   expect_equal(url$path, "/tmp/foobar")
 })
 
+test_that("scheme starts with alpha", {
+  url <- parse_url("+ab://host/tmp/foobar")
+  expect_equal(url$scheme, NULL)
+})
+
+test_that("scheme can contain digits", {
+  url <- parse_url("ab1://host/tmp/foobar")
+  expect_equal(url$scheme, "ab1")
+})
+
+test_that("scheme can contain plus", {
+  url <- parse_url("a+b://host/tmp/foobar")
+  expect_equal(url$scheme, "a+b")
+})
+
+test_that("scheme can contain period", {
+  url <- parse_url("a.b://host/tmp/foobar")
+  expect_equal(url$scheme, "a.b")
+})
+
+test_that("scheme can contain hyphen", {
+  url <- parse_url("a-b://host/tmp/foobar")
+  expect_equal(url$scheme, "a-b")
+})
+
+test_that("scheme can be a single character", {
+  url <- parse_url("a://host/tmp/foobar")
+  expect_equal(url$scheme, "a")
+})
+
 # compose_query -----------------------------------------------------------
 
 test_that("I() prevents escaping", {

From 721befbb00620ad69b1310fba211fb821c32f345 Mon Sep 17 00:00:00 2001
From: Yannick Marcon <yannick.marcon@obiba.org>
Date: Fri, 3 Apr 2020 18:56:29 +0200
Subject: [PATCH 2/2] RFC3986 reference added, NEWS update

---
 NEWS.md          | 3 +++
 R/url.r          | 6 +++---
 man/parse_url.Rd | 4 ++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e402d10d..82388958 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,8 @@
 # httr (development version)
 
+* `parse_url()` now refers to RFC3986 for the parsing of the URL's 
+  scheme, with a bit more permissive syntax (@ymarcon, #615).
+
 # httr 1.4.1
 
 * Remove the default `cainfo` option on Windows. Providing a CA bundle is not 
diff --git a/R/url.r b/R/url.r
index 70c76d5a..bc265179 100644
--- a/R/url.r
+++ b/R/url.r
@@ -1,9 +1,9 @@
 # Good example for testing
 # http://stevenlevithan.com/demo/parseuri/js/
 
-#' Parse and build urls according to RFC1808.
+#' Parse and build urls according to RFC3986.
 #'
-#' See <http://tools.ietf.org/html/rfc1808.html> for details of parsing
+#' See <https://tools.ietf.org/html/rfc3986> for details of parsing
 #' algorithm.
 #'
 #' @param url For `parse_url` a character vector (of length 1) to parse
@@ -24,7 +24,7 @@
 #' parse_url("http://google.com/")
 #' parse_url("http://google.com:80/")
 #' parse_url("http://google.com:80/?a=1&b=2")
-#' 
+#'
 #' url <- parse_url("http://google.com/")
 #' url$scheme <- "https"
 #' url$query <- list(q = "hello")
diff --git a/man/parse_url.Rd b/man/parse_url.Rd
index 3cbbf5c8..3a9ff37b 100644
--- a/man/parse_url.Rd
+++ b/man/parse_url.Rd
@@ -3,7 +3,7 @@
 \name{parse_url}
 \alias{parse_url}
 \alias{build_url}
-\title{Parse and build urls according to RFC1808.}
+\title{Parse and build urls according to RFC3986.}
 \usage{
 parse_url(url)
 
@@ -29,7 +29,7 @@ a list containing:
 }
 }
 \description{
-See \url{http://tools.ietf.org/html/rfc1808.html} for details of parsing
+See \url{https://tools.ietf.org/html/rfc3986} for details of parsing
 algorithm.
 }
 \examples{