From 218272c17be44fdaf77f555a7731a754c87909d9 Mon Sep 17 00:00:00 2001 From: Yannick Marcon Date: Wed, 18 Sep 2019 09:20:33 +0200 Subject: [PATCH 1/2] URL's scheme parser is compliant with RFC 3986 --- R/url.r | 2 +- tests/testthat/test-url.r | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/R/url.r b/R/url.r index c3f6d33f..70c76d5a 100644 --- a/R/url.r +++ b/R/url.r @@ -45,7 +45,7 @@ parse_url <- function(url) { } fragment <- pull_off("#(.*)$") - scheme <- pull_off("^([[:alpha:]+.-]+):") + scheme <- pull_off("^([[:alpha:]][[:alpha:][:digit:]+.-]*):") netloc <- pull_off("^//([^/?]*)/?") if (identical(netloc, "")) { # corresponds to /// diff --git a/tests/testthat/test-url.r b/tests/testthat/test-url.r index 9b026681..bd9f2334 100644 --- a/tests/testthat/test-url.r +++ b/tests/testthat/test-url.r @@ -85,6 +85,36 @@ test_that("parse_url preserves leading / in path", { expect_equal(url$path, "/tmp/foobar") }) +test_that("scheme starts with alpha", { + url <- parse_url("+ab://host/tmp/foobar") + expect_equal(url$scheme, NULL) +}) + +test_that("scheme can contain digits", { + url <- parse_url("ab1://host/tmp/foobar") + expect_equal(url$scheme, "ab1") +}) + +test_that("scheme can contain plus", { + url <- parse_url("a+b://host/tmp/foobar") + expect_equal(url$scheme, "a+b") +}) + +test_that("scheme can contain period", { + url <- parse_url("a.b://host/tmp/foobar") + expect_equal(url$scheme, "a.b") +}) + +test_that("scheme can contain hyphen", { + url <- parse_url("a-b://host/tmp/foobar") + expect_equal(url$scheme, "a-b") +}) + +test_that("scheme can be a single character", { + url <- parse_url("a://host/tmp/foobar") + expect_equal(url$scheme, "a") +}) + # compose_query ----------------------------------------------------------- test_that("I() prevents escaping", { From 721befbb00620ad69b1310fba211fb821c32f345 Mon Sep 17 00:00:00 2001 From: Yannick Marcon Date: Fri, 3 Apr 2020 18:56:29 +0200 Subject: [PATCH 2/2] RFC3986 reference added, NEWS update --- NEWS.md | 3 +++ R/url.r | 6 +++--- man/parse_url.Rd | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index e402d10d..82388958 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # httr (development version) +* `parse_url()` now refers to RFC3986 for the parsing of the URL's + scheme, with a bit more permissive syntax (@ymarcon, #615). + # httr 1.4.1 * Remove the default `cainfo` option on Windows. Providing a CA bundle is not diff --git a/R/url.r b/R/url.r index 70c76d5a..bc265179 100644 --- a/R/url.r +++ b/R/url.r @@ -1,9 +1,9 @@ # Good example for testing # http://stevenlevithan.com/demo/parseuri/js/ -#' Parse and build urls according to RFC1808. +#' Parse and build urls according to RFC3986. #' -#' See for details of parsing +#' See for details of parsing #' algorithm. #' #' @param url For `parse_url` a character vector (of length 1) to parse @@ -24,7 +24,7 @@ #' parse_url("http://google.com/") #' parse_url("http://google.com:80/") #' parse_url("http://google.com:80/?a=1&b=2") -#' +#' #' url <- parse_url("http://google.com/") #' url$scheme <- "https" #' url$query <- list(q = "hello") diff --git a/man/parse_url.Rd b/man/parse_url.Rd index 3cbbf5c8..3a9ff37b 100644 --- a/man/parse_url.Rd +++ b/man/parse_url.Rd @@ -3,7 +3,7 @@ \name{parse_url} \alias{parse_url} \alias{build_url} -\title{Parse and build urls according to RFC1808.} +\title{Parse and build urls according to RFC3986.} \usage{ parse_url(url) @@ -29,7 +29,7 @@ a list containing: } } \description{ -See \url{http://tools.ietf.org/html/rfc1808.html} for details of parsing +See \url{https://tools.ietf.org/html/rfc3986} for details of parsing algorithm. } \examples{