5151
5252ASCIISET : Final [Set [str ]] = set (string .printable )
5353
54- # See https://tools.ietf .org/html/rfc7230#section-3.1.1
55- # and https://tools.ietf .org/html/rfc7230#appendix-B
54+ # See https://www.rfc-editor .org/rfc/rfc9110.html#name-overview
55+ # and https://www.rfc-editor .org/rfc/rfc9110.html#name-tokens
5656#
5757# method = token
5858# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
5959# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
6060# token = 1*tchar
6161METHRE : Final [Pattern [str ]] = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+" )
62- VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d+ ).(\d+ )" )
63- HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\" ]" )
62+ VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d).(\d)" )
63+ HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\ ]" )
6464
6565
6666class RawRequestMessage (NamedTuple ):
@@ -135,8 +135,11 @@ def parse_headers(
135135 except ValueError :
136136 raise InvalidHeader (line ) from None
137137
138- bname = bname .strip (b" \t " )
139- bvalue = bvalue .lstrip ()
138+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
139+ if {bname [0 ], bname [- 1 ]} & {32 , 9 }: # {" ", "\t"}
140+ raise InvalidHeader (line )
141+
142+ bvalue = bvalue .lstrip (b" \t " )
140143 if HDRRE .search (bname ):
141144 raise InvalidHeader (bname )
142145 if len (bname ) > self .max_field_size :
@@ -157,6 +160,7 @@ def parse_headers(
157160 # consume continuation lines
158161 continuation = line and line [0 ] in (32 , 9 ) # (' ', '\t')
159162
163+ # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
160164 if continuation :
161165 bvalue_lst = [bvalue ]
162166 while continuation :
@@ -191,10 +195,14 @@ def parse_headers(
191195 str (header_length ),
192196 )
193197
194- bvalue = bvalue .strip ()
198+ bvalue = bvalue .strip (b" \t " )
195199 name = bname .decode ("utf-8" , "surrogateescape" )
196200 value = bvalue .decode ("utf-8" , "surrogateescape" )
197201
202+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
203+ if "\n " in value or "\r " in value or "\x00 " in value :
204+ raise InvalidHeader (bvalue )
205+
198206 headers .add (name , value )
199207 raw_headers .append ((bname , bvalue ))
200208
@@ -309,15 +317,12 @@ def get_content_length() -> Optional[int]:
309317 if length_hdr is None :
310318 return None
311319
312- try :
313- length = int ( length_hdr )
314- except ValueError :
320+ # Shouldn't allow +/- or other number formats.
321+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
322+ if not length_hdr . strip ( " \t " ). isdigit () :
315323 raise InvalidHeader (CONTENT_LENGTH )
316324
317- if length < 0 :
318- raise InvalidHeader (CONTENT_LENGTH )
319-
320- return length
325+ return int (length_hdr )
321326
322327 length = get_content_length ()
323328 # do not support old websocket spec
@@ -457,6 +462,24 @@ def parse_headers(
457462 upgrade = False
458463 chunked = False
459464
465+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
466+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
467+ singletons = (
468+ hdrs .CONTENT_LENGTH ,
469+ hdrs .CONTENT_LOCATION ,
470+ hdrs .CONTENT_RANGE ,
471+ hdrs .CONTENT_TYPE ,
472+ hdrs .ETAG ,
473+ hdrs .HOST ,
474+ hdrs .MAX_FORWARDS ,
475+ hdrs .SERVER ,
476+ hdrs .TRANSFER_ENCODING ,
477+ hdrs .USER_AGENT ,
478+ )
479+ bad_hdr = next ((h for h in singletons if len (headers .getall (h , ())) > 1 ), None )
480+ if bad_hdr is not None :
481+ raise BadHttpMessage (f"Duplicate '{ bad_hdr } ' header found." )
482+
460483 # keep-alive
461484 conn = headers .get (hdrs .CONNECTION )
462485 if conn :
@@ -510,7 +533,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
510533 # request line
511534 line = lines [0 ].decode ("utf-8" , "surrogateescape" )
512535 try :
513- method , path , version = line .split (None , 2 )
536+ method , path , version = line .split (maxsplit = 2 )
514537 except ValueError :
515538 raise BadStatusLine (line ) from None
516539
@@ -524,14 +547,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
524547 raise BadStatusLine (method )
525548
526549 # version
527- try :
528- if version .startswith ("HTTP/" ):
529- n1 , n2 = version [5 :].split ("." , 1 )
530- version_o = HttpVersion (int (n1 ), int (n2 ))
531- else :
532- raise BadStatusLine (version )
533- except Exception :
534- raise BadStatusLine (version )
550+ match = VERSRE .match (version )
551+ if match is None :
552+ raise BadStatusLine (line )
553+ version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
535554
536555 if method == "CONNECT" :
537556 # authority-form,
@@ -598,12 +617,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
598617 def parse_message (self , lines : List [bytes ]) -> RawResponseMessage :
599618 line = lines [0 ].decode ("utf-8" , "surrogateescape" )
600619 try :
601- version , status = line .split (None , 1 )
620+ version , status = line .split (maxsplit = 1 )
602621 except ValueError :
603622 raise BadStatusLine (line ) from None
604623
605624 try :
606- status , reason = status .split (None , 1 )
625+ status , reason = status .split (maxsplit = 1 )
607626 except ValueError :
608627 reason = ""
609628
@@ -619,13 +638,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
619638 version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
620639
621640 # The status code is a three-digit number
622- try :
623- status_i = int (status )
624- except ValueError :
625- raise BadStatusLine (line ) from None
626-
627- if status_i > 999 :
641+ if len (status ) != 3 or not status .isdigit ():
628642 raise BadStatusLine (line )
643+ status_i = int (status )
629644
630645 # read headers
631646 (
@@ -760,14 +775,13 @@ def feed_data(
760775 else :
761776 size_b = chunk [:pos ]
762777
763- try :
764- size = int (bytes (size_b ), 16 )
765- except ValueError :
778+ if not size_b .isdigit ():
766779 exc = TransferEncodingError (
767780 chunk [:pos ].decode ("ascii" , "surrogateescape" )
768781 )
769782 self .payload .set_exception (exc )
770- raise exc from None
783+ raise exc
784+ size = int (bytes (size_b ), 16 )
771785
772786 chunk = chunk [pos + 2 :]
773787 if size == 0 : # eof marker
0 commit comments