Skip to content

Commit 85713a4

Browse files
Update Python parser for RFCs 9110/9112 (#7661)
1 parent 314e960 commit 85713a4

File tree

2 files changed

+131
-40
lines changed

2 files changed

+131
-40
lines changed

aiohttp/http_parser.py

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,16 @@
5151

5252
ASCIISET: Final[Set[str]] = set(string.printable)
5353

54-
# See https://tools.ietf.org/html/rfc7230#section-3.1.1
55-
# and https://tools.ietf.org/html/rfc7230#appendix-B
54+
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
55+
# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
5656
#
5757
# method = token
5858
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
5959
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
6060
# token = 1*tchar
6161
METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
62-
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
63-
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
62+
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
63+
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
6464

6565

6666
class RawRequestMessage(NamedTuple):
@@ -135,8 +135,11 @@ def parse_headers(
135135
except ValueError:
136136
raise InvalidHeader(line) from None
137137

138-
bname = bname.strip(b" \t")
139-
bvalue = bvalue.lstrip()
138+
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
139+
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
140+
raise InvalidHeader(line)
141+
142+
bvalue = bvalue.lstrip(b" \t")
140143
if HDRRE.search(bname):
141144
raise InvalidHeader(bname)
142145
if len(bname) > self.max_field_size:
@@ -157,6 +160,7 @@ def parse_headers(
157160
# consume continuation lines
158161
continuation = line and line[0] in (32, 9) # (' ', '\t')
159162

163+
# Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
160164
if continuation:
161165
bvalue_lst = [bvalue]
162166
while continuation:
@@ -191,10 +195,14 @@ def parse_headers(
191195
str(header_length),
192196
)
193197

194-
bvalue = bvalue.strip()
198+
bvalue = bvalue.strip(b" \t")
195199
name = bname.decode("utf-8", "surrogateescape")
196200
value = bvalue.decode("utf-8", "surrogateescape")
197201

202+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
203+
if "\n" in value or "\r" in value or "\x00" in value:
204+
raise InvalidHeader(bvalue)
205+
198206
headers.add(name, value)
199207
raw_headers.append((bname, bvalue))
200208

@@ -309,15 +317,12 @@ def get_content_length() -> Optional[int]:
309317
if length_hdr is None:
310318
return None
311319

312-
try:
313-
length = int(length_hdr)
314-
except ValueError:
320+
# Shouldn't allow +/- or other number formats.
321+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
322+
if not length_hdr.strip(" \t").isdigit():
315323
raise InvalidHeader(CONTENT_LENGTH)
316324

317-
if length < 0:
318-
raise InvalidHeader(CONTENT_LENGTH)
319-
320-
return length
325+
return int(length_hdr)
321326

322327
length = get_content_length()
323328
# do not support old websocket spec
@@ -457,6 +462,24 @@ def parse_headers(
457462
upgrade = False
458463
chunked = False
459464

465+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
466+
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
467+
singletons = (
468+
hdrs.CONTENT_LENGTH,
469+
hdrs.CONTENT_LOCATION,
470+
hdrs.CONTENT_RANGE,
471+
hdrs.CONTENT_TYPE,
472+
hdrs.ETAG,
473+
hdrs.HOST,
474+
hdrs.MAX_FORWARDS,
475+
hdrs.SERVER,
476+
hdrs.TRANSFER_ENCODING,
477+
hdrs.USER_AGENT,
478+
)
479+
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
480+
if bad_hdr is not None:
481+
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
482+
460483
# keep-alive
461484
conn = headers.get(hdrs.CONNECTION)
462485
if conn:
@@ -510,7 +533,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
510533
# request line
511534
line = lines[0].decode("utf-8", "surrogateescape")
512535
try:
513-
method, path, version = line.split(None, 2)
536+
method, path, version = line.split(maxsplit=2)
514537
except ValueError:
515538
raise BadStatusLine(line) from None
516539

@@ -524,14 +547,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
524547
raise BadStatusLine(method)
525548

526549
# version
527-
try:
528-
if version.startswith("HTTP/"):
529-
n1, n2 = version[5:].split(".", 1)
530-
version_o = HttpVersion(int(n1), int(n2))
531-
else:
532-
raise BadStatusLine(version)
533-
except Exception:
534-
raise BadStatusLine(version)
550+
match = VERSRE.match(version)
551+
if match is None:
552+
raise BadStatusLine(line)
553+
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
535554

536555
if method == "CONNECT":
537556
# authority-form,
@@ -598,12 +617,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
598617
def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
599618
line = lines[0].decode("utf-8", "surrogateescape")
600619
try:
601-
version, status = line.split(None, 1)
620+
version, status = line.split(maxsplit=1)
602621
except ValueError:
603622
raise BadStatusLine(line) from None
604623

605624
try:
606-
status, reason = status.split(None, 1)
625+
status, reason = status.split(maxsplit=1)
607626
except ValueError:
608627
reason = ""
609628

@@ -619,13 +638,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
619638
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
620639

621640
# The status code is a three-digit number
622-
try:
623-
status_i = int(status)
624-
except ValueError:
625-
raise BadStatusLine(line) from None
626-
627-
if status_i > 999:
641+
if len(status) != 3 or not status.isdigit():
628642
raise BadStatusLine(line)
643+
status_i = int(status)
629644

630645
# read headers
631646
(
@@ -760,14 +775,13 @@ def feed_data(
760775
else:
761776
size_b = chunk[:pos]
762777

763-
try:
764-
size = int(bytes(size_b), 16)
765-
except ValueError:
778+
if not size_b.isdigit():
766779
exc = TransferEncodingError(
767780
chunk[:pos].decode("ascii", "surrogateescape")
768781
)
769782
self.payload.set_exception(exc)
770-
raise exc from None
783+
raise exc
784+
size = int(bytes(size_b), 16)
771785

772786
chunk = chunk[pos + 2 :]
773787
if size == 0: # eof marker

tests/test_http_parser.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,74 @@ def test_invalid_name(parser) -> None:
476476
parser.feed_data(text)
477477

478478

479+
def test_cve_2023_37276(parser: Any) -> None:
480+
text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
481+
with pytest.raises(http_exceptions.BadHttpMessage):
482+
parser.feed_data(text)
483+
484+
485+
@pytest.mark.parametrize(
486+
"hdr",
487+
(
488+
"Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
489+
"Content-Length: +256",
490+
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
491+
"Bar: abc\ndef",
492+
"Baz: abc\x00def",
493+
"Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
494+
"Foo\t: bar",
495+
),
496+
)
497+
def test_bad_headers(parser: Any, hdr: str) -> None:
498+
text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
499+
with pytest.raises(http_exceptions.InvalidHeader):
500+
parser.feed_data(text)
501+
502+
503+
def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
504+
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
505+
parser = HttpRequestParserPy(
506+
protocol,
507+
loop,
508+
2**16,
509+
max_line_size=8190,
510+
max_field_size=8190,
511+
)
512+
text = (
513+
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
514+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
515+
)
516+
messages, upgrade, tail = parser.feed_data(text)
517+
assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
518+
519+
520+
@pytest.mark.skipif(
521+
"HttpRequestParserC" not in dir(aiohttp.http_parser),
522+
reason="C based HTTP parser not available",
523+
)
524+
def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
525+
"""C parser behaves differently. Maybe we should align them later."""
526+
parser = HttpRequestParserC(
527+
protocol,
528+
loop,
529+
2**16,
530+
max_line_size=8190,
531+
max_field_size=8190,
532+
)
533+
text = (
534+
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
535+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
536+
)
537+
with pytest.raises(http_exceptions.BadHttpMessage):
538+
parser.feed_data(text)
539+
540+
541+
def test_whitespace_before_header(parser: Any) -> None:
542+
text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
543+
with pytest.raises(http_exceptions.BadHttpMessage):
544+
parser.feed_data(text)
545+
546+
479547
@pytest.mark.parametrize("size", [40960, 8191])
480548
def test_max_header_field_size(parser, size) -> None:
481549
name = b"t" * size
@@ -657,6 +725,11 @@ def test_http_request_parser_bad_version(parser) -> None:
657725
parser.feed_data(b"GET //get HT/11\r\n\r\n")
658726

659727

728+
def test_http_request_parser_bad_version_number(parser: Any) -> None:
729+
with pytest.raises(http_exceptions.BadHttpMessage):
730+
parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
731+
732+
660733
@pytest.mark.parametrize("size", [40965, 8191])
661734
def test_http_request_max_status_line(parser, size) -> None:
662735
path = b"t" * (size - 5)
@@ -724,6 +797,11 @@ def test_http_response_parser_bad_version(response) -> None:
724797
response.feed_data(b"HT/11 200 Ok\r\n\r\n")
725798

726799

800+
def test_http_response_parser_bad_version_number(response) -> None:
801+
with pytest.raises(http_exceptions.BadHttpMessage):
802+
response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
803+
804+
727805
def test_http_response_parser_no_reason(response) -> None:
728806
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
729807

@@ -754,19 +832,18 @@ def test_http_response_parser_bad(response) -> None:
754832
response.feed_data(b"HTT/1\r\n\r\n")
755833

756834

757-
@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser")
758835
def test_http_response_parser_code_under_100(response) -> None:
759-
msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
760-
assert msg.code == 99
836+
with pytest.raises(http_exceptions.BadStatusLine):
837+
response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
761838

762839

763840
def test_http_response_parser_code_above_999(response) -> None:
764-
with pytest.raises(http_exceptions.BadHttpMessage):
841+
with pytest.raises(http_exceptions.BadStatusLine):
765842
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
766843

767844

768845
def test_http_response_parser_code_not_int(response) -> None:
769-
with pytest.raises(http_exceptions.BadHttpMessage):
846+
with pytest.raises(http_exceptions.BadStatusLine):
770847
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
771848

772849

0 commit comments

Comments
 (0)