66from enum import IntEnum
77from typing import (
88 Any ,
9+ ClassVar ,
910 Final ,
1011 Generic ,
1112 List ,
13+ Literal ,
1214 NamedTuple ,
1315 Optional ,
1416 Pattern ,
2527from . import hdrs
2628from .base_protocol import BaseProtocol
2729from .compression_utils import HAS_BROTLI , BrotliDecompressor , ZLibDecompressor
28- from .helpers import NO_EXTENSIONS , BaseTimerContext
30+ from .helpers import DEBUG , NO_EXTENSIONS , BaseTimerContext
2931from .http_exceptions import (
3032 BadHttpMessage ,
3133 BadStatusLine ,
4951 "RawResponseMessage" ,
5052)
5153
54+ _SEP = Literal [b"\r \n " , b"\n " ]
55+
5256ASCIISET : Final [Set [str ]] = set (string .printable )
5357
5458# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
6165METHRE : Final [Pattern [str ]] = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+" )
6266VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d).(\d)" )
6367HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]" )
68+ HEXDIGIT = re .compile (rb"[0-9a-fA-F]+" )
6469
6570
6671class RawRequestMessage (NamedTuple ):
@@ -210,6 +215,8 @@ def parse_headers(
210215
211216
212217class HttpParser (abc .ABC , Generic [_MsgT ]):
218+ lax : ClassVar [bool ] = False
219+
213220 def __init__ (
214221 self ,
215222 protocol : Optional [BaseProtocol ] = None ,
@@ -272,7 +279,7 @@ def feed_eof(self) -> Optional[_MsgT]:
272279 def feed_data (
273280 self ,
274281 data : bytes ,
275- SEP : bytes = b"\r \n " ,
282+ SEP : _SEP = b"\r \n " ,
276283 EMPTY : bytes = b"" ,
277284 CONTENT_LENGTH : istr = hdrs .CONTENT_LENGTH ,
278285 METH_CONNECT : str = hdrs .METH_CONNECT ,
@@ -296,13 +303,16 @@ def feed_data(
296303 pos = data .find (SEP , start_pos )
297304 # consume \r\n
298305 if pos == start_pos and not self ._lines :
299- start_pos = pos + 2
306+ start_pos = pos + len ( SEP )
300307 continue
301308
302309 if pos >= start_pos :
303310 # line found
304- self ._lines .append (data [start_pos :pos ])
305- start_pos = pos + 2
311+ line = data [start_pos :pos ]
312+ if SEP == b"\n " : # For lax response parsing
313+ line = line .rstrip (b"\r " )
314+ self ._lines .append (line )
315+ start_pos = pos + len (SEP )
306316
307317 # \r\n\r\n found
308318 if self ._lines [- 1 ] == EMPTY :
@@ -319,7 +329,7 @@ def get_content_length() -> Optional[int]:
319329
320330 # Shouldn't allow +/- or other number formats.
321331 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
322- if not length_hdr .strip (" \t " ).isdigit ():
332+ if not length_hdr .strip (" \t " ).isdecimal ():
323333 raise InvalidHeader (CONTENT_LENGTH )
324334
325335 return int (length_hdr )
@@ -356,6 +366,7 @@ def get_content_length() -> Optional[int]:
356366 readall = self .readall ,
357367 response_with_body = self .response_with_body ,
358368 auto_decompress = self ._auto_decompress ,
369+ lax = self .lax ,
359370 )
360371 if not payload_parser .done :
361372 self ._payload_parser = payload_parser
@@ -374,6 +385,7 @@ def get_content_length() -> Optional[int]:
374385 compression = msg .compression ,
375386 readall = True ,
376387 auto_decompress = self ._auto_decompress ,
388+ lax = self .lax ,
377389 )
378390 else :
379391 if (
@@ -397,6 +409,7 @@ def get_content_length() -> Optional[int]:
397409 readall = True ,
398410 response_with_body = self .response_with_body ,
399411 auto_decompress = self ._auto_decompress ,
412+ lax = self .lax ,
400413 )
401414 if not payload_parser .done :
402415 self ._payload_parser = payload_parser
@@ -419,7 +432,7 @@ def get_content_length() -> Optional[int]:
419432 assert not self ._lines
420433 assert self ._payload_parser is not None
421434 try :
422- eof , data = self ._payload_parser .feed_data (data [start_pos :])
435+ eof , data = self ._payload_parser .feed_data (data [start_pos :], SEP )
423436 except BaseException as exc :
424437 if self .payload_exception is not None :
425438 self ._payload_parser .payload .set_exception (
@@ -614,6 +627,20 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
614627 Returns RawResponseMessage.
615628 """
616629
630+ # Lax mode should only be enabled on response parser.
631+ lax = not DEBUG
632+
633+ def feed_data (
634+ self ,
635+ data : bytes ,
636+ SEP : Optional [_SEP ] = None ,
637+ * args : Any ,
638+ ** kwargs : Any ,
639+ ) -> Tuple [List [Tuple [RawResponseMessage , StreamReader ]], bool , bytes ]:
640+ if SEP is None :
641+ SEP = b"\r \n " if DEBUG else b"\n "
642+ return super ().feed_data (data , SEP , * args , ** kwargs )
643+
617644 def parse_message (self , lines : List [bytes ]) -> RawResponseMessage :
618645 line = lines [0 ].decode ("utf-8" , "surrogateescape" )
619646 try :
@@ -638,7 +665,7 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
638665 version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
639666
640667 # The status code is a three-digit number
641- if len (status ) != 3 or not status .isdigit ():
668+ if len (status ) != 3 or not status .isdecimal ():
642669 raise BadStatusLine (line )
643670 status_i = int (status )
644671
@@ -680,13 +707,15 @@ def __init__(
680707 readall : bool = False ,
681708 response_with_body : bool = True ,
682709 auto_decompress : bool = True ,
710+ lax : bool = False ,
683711 ) -> None :
684712 self ._length = 0
685713 self ._type = ParseState .PARSE_NONE
686714 self ._chunk = ChunkState .PARSE_CHUNKED_SIZE
687715 self ._chunk_size = 0
688716 self ._chunk_tail = b""
689717 self ._auto_decompress = auto_decompress
718+ self ._lax = lax
690719 self .done = False
691720
692721 # payload decompression wrapper
@@ -738,7 +767,7 @@ def feed_eof(self) -> None:
738767 )
739768
740769 def feed_data (
741- self , chunk : bytes , SEP : bytes = b"\r \n " , CHUNK_EXT : bytes = b";"
770+ self , chunk : bytes , SEP : _SEP = b"\r \n " , CHUNK_EXT : bytes = b";"
742771 ) -> Tuple [bool , bytes ]:
743772 # Read specified amount of bytes
744773 if self ._type == ParseState .PARSE_LENGTH :
@@ -775,17 +804,22 @@ def feed_data(
775804 else :
776805 size_b = chunk [:pos ]
777806
778- if not size_b .isdigit ():
807+ if self ._lax : # Allow whitespace in lax mode.
808+ size_b = size_b .strip ()
809+
810+ if not re .fullmatch (HEXDIGIT , size_b ):
779811 exc = TransferEncodingError (
780812 chunk [:pos ].decode ("ascii" , "surrogateescape" )
781813 )
782814 self .payload .set_exception (exc )
783815 raise exc
784816 size = int (bytes (size_b ), 16 )
785817
786- chunk = chunk [pos + 2 :]
818+ chunk = chunk [pos + len ( SEP ) :]
787819 if size == 0 : # eof marker
788820 self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
821+ if self ._lax and chunk .startswith (b"\r " ):
822+ chunk = chunk [1 :]
789823 else :
790824 self ._chunk = ChunkState .PARSE_CHUNKED_CHUNK
791825 self ._chunk_size = size
@@ -807,13 +841,15 @@ def feed_data(
807841 self ._chunk_size = 0
808842 self .payload .feed_data (chunk [:required ], required )
809843 chunk = chunk [required :]
844+ if self ._lax and chunk .startswith (b"\r " ):
845+ chunk = chunk [1 :]
810846 self ._chunk = ChunkState .PARSE_CHUNKED_CHUNK_EOF
811847 self .payload .end_http_chunk_receiving ()
812848
813849 # toss the CRLF at the end of the chunk
814850 if self ._chunk == ChunkState .PARSE_CHUNKED_CHUNK_EOF :
815- if chunk [:2 ] == SEP :
816- chunk = chunk [2 :]
851+ if chunk [: len ( SEP ) ] == SEP :
852+ chunk = chunk [len ( SEP ) :]
817853 self ._chunk = ChunkState .PARSE_CHUNKED_SIZE
818854 else :
819855 self ._chunk_tail = chunk
@@ -823,11 +859,11 @@ def feed_data(
823859 # we should get another \r\n otherwise
824860 # trailers needs to be skiped until \r\n\r\n
825861 if self ._chunk == ChunkState .PARSE_MAYBE_TRAILERS :
826- head = chunk [:2 ]
862+ head = chunk [: len ( SEP ) ]
827863 if head == SEP :
828864 # end of stream
829865 self .payload .feed_eof ()
830- return True , chunk [2 :]
866+ return True , chunk [len ( SEP ) :]
831867 # Both CR and LF, or only LF may not be received yet. It is
832868 # expected that CRLF or LF will be shown at the very first
833869 # byte next time, otherwise trailers should come. The last
@@ -845,7 +881,7 @@ def feed_data(
845881 if self ._chunk == ChunkState .PARSE_TRAILERS :
846882 pos = chunk .find (SEP )
847883 if pos >= 0 :
848- chunk = chunk [pos + 2 :]
884+ chunk = chunk [pos + len ( SEP ) :]
849885 self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
850886 else :
851887 self ._chunk_tail = chunk
0 commit comments