Skip to content

Commit 17ba1f0

Browse files
asvetlovwebknjaz
andcommitted
Support absolute-form and authority-form URLs by web server (#6409)
Co-authored-by: Sviatoslav Sydorenko <[email protected]>
1 parent 2d70f95 commit 17ba1f0

File tree

7 files changed

+123
-58
lines changed

7 files changed

+123
-58
lines changed

CHANGES/6227.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Started supporting ``authority-form`` and ``absolute-form`` URLs on the server-side.

aiohttp/_http_parser.pyx

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ cdef class HttpParser:
425425
raw_headers = tuple(self._raw_headers)
426426
headers = CIMultiDictProxy(self._headers)
427427

428-
if upgrade or self._cparser.method == 5: # cparser.CONNECT:
428+
if upgrade or self._cparser.method == cparser.HTTP_CONNECT:
429429
self._upgraded = True
430430

431431
# do not support old websocket spec
@@ -453,7 +453,7 @@ cdef class HttpParser:
453453

454454
if (
455455
ULLONG_MAX > self._cparser.content_length > 0 or chunked or
456-
self._cparser.method == 5 or # CONNECT: 5
456+
self._cparser.method == cparser.HTTP_CONNECT or
457457
(self._cparser.status_code >= 199 and
458458
self._cparser.content_length == 0 and
459459
self._read_until_eof)
@@ -586,34 +586,45 @@ cdef class HttpRequestParser(HttpParser):
586586
self._path = self._buf.decode('utf-8', 'surrogateescape')
587587
try:
588588
idx3 = len(self._path)
589-
idx1 = self._path.find("?")
590-
if idx1 == -1:
591-
query = ""
592-
idx2 = self._path.find("#")
593-
if idx2 == -1:
594-
path = self._path
595-
fragment = ""
596-
else:
597-
path = self._path[0: idx2]
598-
fragment = self._path[idx2+1:]
589+
if self._cparser.method == cparser.HTTP_CONNECT:
590+
# authority-form,
591+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
592+
self._url = URL.build(authority=self._path, encoded=True)
593+
elif idx3 > 1 and self._path[0] == '/':
594+
# origin-form,
595+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
596+
idx1 = self._path.find("?")
597+
if idx1 == -1:
598+
query = ""
599+
idx2 = self._path.find("#")
600+
if idx2 == -1:
601+
path = self._path
602+
fragment = ""
603+
else:
604+
path = self._path[0: idx2]
605+
fragment = self._path[idx2+1:]
599606

600-
else:
601-
path = self._path[0:idx1]
602-
idx1 += 1
603-
idx2 = self._path.find("#", idx1+1)
604-
if idx2 == -1:
605-
query = self._path[idx1:]
606-
fragment = ""
607607
else:
608-
query = self._path[idx1: idx2]
609-
fragment = self._path[idx2+1:]
610-
611-
self._url = URL.build(
612-
path=path,
613-
query_string=query,
614-
fragment=fragment,
615-
encoded=True,
616-
)
608+
path = self._path[0:idx1]
609+
idx1 += 1
610+
idx2 = self._path.find("#", idx1+1)
611+
if idx2 == -1:
612+
query = self._path[idx1:]
613+
fragment = ""
614+
else:
615+
query = self._path[idx1: idx2]
616+
fragment = self._path[idx2+1:]
617+
618+
self._url = URL.build(
619+
path=path,
620+
query_string=query,
621+
fragment=fragment,
622+
encoded=True,
623+
)
624+
else:
625+
# absolute-form for proxy maybe,
626+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
627+
self._url = URL(self._path, encoded=True)
617628
finally:
618629
PyByteArray_Resize(self._buf, 0)
619630

@@ -726,7 +737,10 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
726737
pyparser._last_error = exc
727738
return -1
728739
else:
729-
if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT
740+
if (
741+
pyparser._cparser.upgrade or
742+
pyparser._cparser.method == cparser.HTTP_CONNECT
743+
):
730744
return 2
731745
else:
732746
return 0

aiohttp/http_parser.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,6 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
532532
"Status line is too long", str(self.max_line_size), str(len(path))
533533
)
534534

535-
path_part, _hash_separator, url_fragment = path.partition("#")
536-
path_part, _question_mark_separator, qs_part = path_part.partition("?")
537-
538535
# method
539536
if not METHRE.match(method):
540537
raise BadStatusLine(method)
@@ -549,6 +546,31 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
549546
except Exception:
550547
raise BadStatusLine(version)
551548

549+
if method == "CONNECT":
550+
# authority-form,
551+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
552+
url = URL.build(authority=path, encoded=True)
553+
elif path.startswith("/"):
554+
# origin-form,
555+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
556+
path_part, _hash_separator, url_fragment = path.partition("#")
557+
path_part, _question_mark_separator, qs_part = path_part.partition("?")
558+
559+
# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
560+
# NOTE: parser does, otherwise it results into the same
561+
# NOTE: HTTP Request-Line input producing different
562+
# NOTE: `yarl.URL()` objects
563+
url = URL.build(
564+
path=path_part,
565+
query_string=qs_part,
566+
fragment=url_fragment,
567+
encoded=True,
568+
)
569+
else:
570+
# absolute-form for proxy maybe,
571+
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
572+
url = URL(path, encoded=True)
573+
552574
# read headers
553575
(
554576
headers,
@@ -575,16 +597,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
575597
compression,
576598
upgrade,
577599
chunked,
578-
# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
579-
# NOTE: parser does, otherwise it results into the same
580-
# NOTE: HTTP Request-Line input producing different
581-
# NOTE: `yarl.URL()` objects
582-
URL.build(
583-
path=path_part,
584-
query_string=qs_part,
585-
fragment=url_fragment,
586-
encoded=True,
587-
),
600+
url,
588601
)
589602

590603

aiohttp/web_request.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,24 @@ def __init__(
171171
self._headers = message.headers
172172
self._method = message.method
173173
self._version = message.version
174-
self._rel_url = message.url
174+
self._cache = {} # type: Dict[str, Any]
175+
url = message.url
176+
if url.is_absolute():
177+
# absolute URL is given,
178+
# override auto-calculating url, host, and scheme
179+
# all other properties should be good
180+
self._cache["url"] = url
181+
self._cache["host"] = url.host
182+
self._cache["scheme"] = url.scheme
183+
self._rel_url = url.relative()
184+
else:
185+
self._rel_url = message.url
175186
self._post = (
176187
None
177188
) # type: Optional[MultiDictProxy[Union[str, bytes, FileField]]]
178189
self._read_bytes = None # type: Optional[bytes]
179190

180191
self._state = state
181-
self._cache = {} # type: Dict[str, Any]
182192
self._task = task
183193
self._client_max_size = client_max_size
184194
self._loop = loop

tests/test_http_parser.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,15 +362,35 @@ def test_compression_unknown(parser) -> None:
362362
assert msg.compression is None
363363

364364

365-
def test_headers_connect(parser) -> None:
365+
def test_url_connect(parser: Any) -> None:
366+
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
367+
messages, upgrade, tail = parser.feed_data(text)
368+
msg, payload = messages[0]
369+
assert upgrade
370+
assert msg.url == URL.build(authority="www.google.com")
371+
372+
373+
def test_headers_connect(parser: Any) -> None:
366374
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
367375
messages, upgrade, tail = parser.feed_data(text)
368376
msg, payload = messages[0]
369377
assert upgrade
370378
assert isinstance(payload, streams.StreamReader)
371379

372380

373-
def test_headers_old_websocket_key1(parser) -> None:
381+
def test_url_absolute(parser: Any) -> None:
382+
text = (
383+
b"GET https://www.google.com/path/to.html HTTP/1.1\r\n"
384+
b"content-length: 0\r\n\r\n"
385+
)
386+
messages, upgrade, tail = parser.feed_data(text)
387+
msg, payload = messages[0]
388+
assert not upgrade
389+
assert msg.method == "GET"
390+
assert msg.url == URL("https://www.google.com/path/to.html")
391+
392+
393+
def test_headers_old_websocket_key1(parser: Any) -> None:
374394
text = b"GET /test HTTP/1.1\r\n" b"SEC-WEBSOCKET-KEY1: line\r\n\r\n"
375395

376396
with pytest.raises(http_exceptions.BadHttpMessage):

tests/test_proxy_functional.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -341,29 +341,28 @@ async def test_proxy_http_absolute_path(proxy_test_server, get_request) -> None:
341341
assert len(proxy.requests_list) == 1
342342
assert proxy.request.method == "GET"
343343
assert proxy.request.host == "aiohttp.io"
344-
assert proxy.request.path_qs == "http://aiohttp.io/path?query=yes"
344+
assert proxy.request.path_qs == "/path?query=yes"
345345

346346

347347
async def test_proxy_http_raw_path(proxy_test_server, get_request) -> None:
348348
url = "http://aiohttp.io:2561/space sheep?q=can:fly"
349-
raw_url = "http://aiohttp.io:2561/space%20sheep?q=can:fly"
349+
raw_url = "/space%20sheep?q=can:fly"
350350
proxy = await proxy_test_server()
351351

352352
await get_request(url=url, proxy=proxy.url)
353353

354-
assert proxy.request.host == "aiohttp.io:2561"
354+
assert proxy.request.host == "aiohttp.io"
355355
assert proxy.request.path_qs == raw_url
356356

357357

358358
async def test_proxy_http_idna_support(proxy_test_server, get_request) -> None:
359359
url = "http://éé.com/"
360-
raw_url = "http://xn--9caa.com/"
361360
proxy = await proxy_test_server()
362361

363362
await get_request(url=url, proxy=proxy.url)
364363

365-
assert proxy.request.host == "xn--9caa.com"
366-
assert proxy.request.path_qs == raw_url
364+
assert proxy.request.host == "éé.com"
365+
assert proxy.request.path_qs == "/"
367366

368367

369368
async def test_proxy_http_connection_error(get_request) -> None:
@@ -759,7 +758,7 @@ async def test_proxy_from_env_http(proxy_test_server, get_request, mocker) -> No
759758
assert len(proxy.requests_list) == 1
760759
assert proxy.request.method == "GET"
761760
assert proxy.request.host == "aiohttp.io"
762-
assert proxy.request.path_qs == "http://aiohttp.io/path"
761+
assert proxy.request.path_qs == "/path"
763762
assert "Proxy-Authorization" not in proxy.request.headers
764763

765764

@@ -781,7 +780,7 @@ async def test_proxy_from_env_http_with_auth(proxy_test_server, get_request, moc
781780
assert len(proxy.requests_list) == 1
782781
assert proxy.request.method == "GET"
783782
assert proxy.request.host == "aiohttp.io"
784-
assert proxy.request.path_qs == "http://aiohttp.io/path"
783+
assert proxy.request.path_qs == "/path"
785784
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()
786785

787786

@@ -807,7 +806,7 @@ async def test_proxy_from_env_http_with_auth_from_netrc(
807806
assert len(proxy.requests_list) == 1
808807
assert proxy.request.method == "GET"
809808
assert proxy.request.host == "aiohttp.io"
810-
assert proxy.request.path_qs == "http://aiohttp.io/path"
809+
assert proxy.request.path_qs == "/path"
811810
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()
812811

813812

@@ -833,7 +832,7 @@ async def test_proxy_from_env_http_without_auth_from_netrc(
833832
assert len(proxy.requests_list) == 1
834833
assert proxy.request.method == "GET"
835834
assert proxy.request.host == "aiohttp.io"
836-
assert proxy.request.path_qs == "http://aiohttp.io/path"
835+
assert proxy.request.path_qs == "/path"
837836
assert "Proxy-Authorization" not in proxy.request.headers
838837

839838

@@ -857,7 +856,7 @@ async def test_proxy_from_env_http_without_auth_from_wrong_netrc(
857856
assert len(proxy.requests_list) == 1
858857
assert proxy.request.method == "GET"
859858
assert proxy.request.host == "aiohttp.io"
860-
assert proxy.request.path_qs == "http://aiohttp.io/path"
859+
assert proxy.request.path_qs == "/path"
861860
assert "Proxy-Authorization" not in proxy.request.headers
862861

863862

@@ -873,7 +872,7 @@ async def xtest_proxy_from_env_https(proxy_test_server, get_request, mocker):
873872
assert len(proxy.requests_list) == 2
874873
assert proxy.request.method == "GET"
875874
assert proxy.request.host == "aiohttp.io"
876-
assert proxy.request.path_qs == "https://aiohttp.io/path"
875+
assert proxy.request.path_qs == "/path"
877876
assert "Proxy-Authorization" not in proxy.request.headers
878877

879878

tests/test_web_request.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,14 @@ def test_non_ascii_raw_path() -> None:
157157
assert "/путь" == req.raw_path
158158

159159

160+
def test_absolute_url() -> None:
161+
req = make_mocked_request("GET", "https://example.com/path/to?a=1")
162+
assert req.url == URL("https://example.com/path/to?a=1")
163+
assert req.scheme == "https"
164+
assert req.host == "example.com"
165+
assert req.rel_url == URL.build(path="/path/to", query={"a": "1"})
166+
167+
160168
def test_content_length() -> None:
161169
req = make_mocked_request("Get", "/", CIMultiDict([("CONTENT-LENGTH", "123")]))
162170

0 commit comments

Comments
 (0)