Backport of

From d5c12ba890557a575c313bb3017910d7616fce3d Mon Sep 17 00:00:00 2001
From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:11:40 +0100
Subject: [PATCH] [PR #7661/85713a48 backport][3.8] Update Python parser for
 RFCs 9110/9112 (#7662)

--- python-aiohttp-3.8.4.orig/aiohttp/http_parser.py
+++ python-aiohttp-3.8.4/aiohttp/http_parser.py
@@ -60,16 +60,16 @@ __all__ = (
 
 ASCIISET: Final[Set[str]] = set(string.printable)
 
-# See https://tools.ietf.org/html/rfc7230#section-3.1.1
-# and https://tools.ietf.org/html/rfc7230#appendix-B
+# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
+# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
 #
 #     method = token
 #     tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
 #             "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
 #     token = 1*tchar
 METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
+HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
 
 
 class RawRequestMessage(NamedTuple):
@@ -148,8 +148,11 @@ class HeadersParser:
             except ValueError:
                 raise InvalidHeader(line) from None
 
-            bname = bname.strip(b" \t")
-            bvalue = bvalue.lstrip()
+            # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+            if {bname[0], bname[-1]} & {32, 9}:  # {" ", "\t"}
+                raise InvalidHeader(line)
+
+            bvalue = bvalue.lstrip(b" \t")
             if HDRRE.search(bname):
                 raise InvalidHeader(bname)
             if len(bname) > self.max_field_size:
@@ -170,6 +173,7 @@ class HeadersParser:
             # consume continuation lines
             continuation = line and line[0] in (32, 9)  # (' ', '\t')
 
+            # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
             if continuation:
                 bvalue_lst = [bvalue]
                 while continuation:
@@ -204,10 +208,14 @@ class HeadersParser:
                         str(header_length),
                     )
 
-            bvalue = bvalue.strip()
+            bvalue = bvalue.strip(b" \t")
             name = bname.decode("utf-8", "surrogateescape")
             value = bvalue.decode("utf-8", "surrogateescape")
 
+            # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+            if "\n" in value or "\r" in value or "\x00" in value:
+                raise InvalidHeader(bvalue)
+
             headers.add(name, value)
             raw_headers.append((bname, bvalue))
 
@@ -322,15 +330,12 @@ class HttpParser(abc.ABC, Generic[_MsgT]
                             if length_hdr is None:
                                 return None
 
-                            try:
-                                length = int(length_hdr)
-                            except ValueError:
+                            # Shouldn't allow +/- or other number formats.
+                            # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
+                            if not length_hdr.strip(" \t").isdigit():
                                 raise InvalidHeader(CONTENT_LENGTH)
 
-                            if length < 0:
-                                raise InvalidHeader(CONTENT_LENGTH)
-
-                            return length
+                            return int(length_hdr)
 
                         length = get_content_length()
                         # do not support old websocket spec
@@ -470,6 +475,24 @@ class HttpParser(abc.ABC, Generic[_MsgT]
         upgrade = False
         chunked = False
 
+        # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
+        # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
+        singletons = (
+            hdrs.CONTENT_LENGTH,
+            hdrs.CONTENT_LOCATION,
+            hdrs.CONTENT_RANGE,
+            hdrs.CONTENT_TYPE,
+            hdrs.ETAG,
+            hdrs.HOST,
+            hdrs.MAX_FORWARDS,
+            hdrs.SERVER,
+            hdrs.TRANSFER_ENCODING,
+            hdrs.USER_AGENT,
+        )
+        bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
+        if bad_hdr is not None:
+            raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
+
         # keep-alive
         conn = headers.get(hdrs.CONNECTION)
         if conn:
@@ -523,7 +546,7 @@ class HttpRequestParser(HttpParser[RawRe
         # request line
         line = lines[0].decode("utf-8", "surrogateescape")
         try:
-            method, path, version = line.split(None, 2)
+            method, path, version = line.split(maxsplit=2)
         except ValueError:
             raise BadStatusLine(line) from None
 
@@ -537,14 +560,10 @@ class HttpRequestParser(HttpParser[RawRe
             raise BadStatusLine(method)
 
         # version
-        try:
-            if version.startswith("HTTP/"):
-                n1, n2 = version[5:].split(".", 1)
-                version_o = HttpVersion(int(n1), int(n2))
-            else:
-                raise BadStatusLine(version)
-        except Exception:
-            raise BadStatusLine(version)
+        match = VERSRE.match(version)
+        if match is None:
+            raise BadStatusLine(line)
+        version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
 
         if method == "CONNECT":
             # authority-form,
@@ -611,12 +630,12 @@ class HttpResponseParser(HttpParser[RawR
     def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
         line = lines[0].decode("utf-8", "surrogateescape")
         try:
-            version, status = line.split(None, 1)
+            version, status = line.split(maxsplit=1)
         except ValueError:
             raise BadStatusLine(line) from None
 
         try:
-            status, reason = status.split(None, 1)
+            status, reason = status.split(maxsplit=1)
         except ValueError:
             reason = ""
 
@@ -632,13 +651,9 @@ class HttpResponseParser(HttpParser[RawR
         version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
 
         # The status code is a three-digit number
-        try:
-            status_i = int(status)
-        except ValueError:
-            raise BadStatusLine(line) from None
-
-        if status_i > 999:
+        if len(status) != 3 or not status.isdigit():
             raise BadStatusLine(line)
+        status_i = int(status)
 
         # read headers
         (
@@ -773,14 +788,13 @@ class HttpPayloadParser:
                         else:
                             size_b = chunk[:pos]
 
-                        try:
-                            size = int(bytes(size_b), 16)
-                        except ValueError:
+                        if not size_b.isdigit():
                             exc = TransferEncodingError(
                                 chunk[:pos].decode("ascii", "surrogateescape")
                             )
                             self.payload.set_exception(exc)
-                            raise exc from None
+                            raise exc
+                        size = int(bytes(size_b), 16)
 
                         chunk = chunk[pos + 2 :]
                         if size == 0:  # eof marker
--- python-aiohttp-3.8.4.orig/tests/test_http_parser.py
+++ python-aiohttp-3.8.4/tests/test_http_parser.py
@@ -424,6 +424,74 @@ def test_invalid_name(parser) -> None:
         parser.feed_data(text)
 
 
+def test_cve_2023_37276(parser: Any) -> None:
+    text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        parser.feed_data(text)
+
+
+@pytest.mark.parametrize(
+    "hdr",
+    (
+        "Content-Length: -5",  # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
+        "Content-Length: +256",
+        "Foo: abc\rdef",  # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+        "Bar: abc\ndef",
+        "Baz: abc\x00def",
+        "Foo : bar",  # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+        "Foo\t: bar",
+    ),
+)
+def test_bad_headers(parser: Any, hdr: str) -> None:
+    text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
+    with pytest.raises(http_exceptions.InvalidHeader):
+        parser.feed_data(text)
+
+
+def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
+    """Test that invalid chunked encoding doesn't allow content-length to be used."""
+    parser = HttpRequestParserPy(
+        protocol,
+        loop,
+        2**16,
+        max_line_size=8190,
+        max_field_size=8190,
+    )
+    text = (
+        b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+        + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
+    )
+    messages, upgrade, tail = parser.feed_data(text)
+    assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
+
+
+@pytest.mark.skipif(
+    "HttpRequestParserC" not in dir(aiohttp.http_parser),
+    reason="C based HTTP parser not available",
+)
+def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
+    """C parser behaves differently. Maybe we should align them later."""
+    parser = HttpRequestParserC(
+        protocol,
+        loop,
+        2**16,
+        max_line_size=8190,
+        max_field_size=8190,
+    )
+    text = (
+        b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+        + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
+    )
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        parser.feed_data(text)
+
+
+def test_whitespace_before_header(parser: Any) -> None:
+    text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        parser.feed_data(text)
+
+
 @pytest.mark.parametrize("size", [40960, 8191])
 def test_max_header_field_size(parser, size) -> None:
     name = b"t" * size
@@ -605,6 +673,11 @@ def test_http_request_parser_bad_version
         parser.feed_data(b"GET //get HT/11\r\n\r\n")
 
 
+def test_http_request_parser_bad_version_number(parser: Any) -> None:
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
+
+
 @pytest.mark.parametrize("size", [40965, 8191])
 def test_http_request_max_status_line(parser, size) -> None:
     path = b"t" * (size - 5)
@@ -672,6 +745,11 @@ def test_http_response_parser_bad_versio
         response.feed_data(b"HT/11 200 Ok\r\n\r\n")
 
 
+def test_http_response_parser_bad_version_number(response) -> None:
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
+
+
 def test_http_response_parser_no_reason(response) -> None:
     msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
 
@@ -686,17 +764,17 @@ def test_http_response_parser_bad(respon
 
 
 def test_http_response_parser_code_under_100(response) -> None:
-    msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
-    assert msg.code == 99
+    with pytest.raises(http_exceptions.BadStatusLine):
+        response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
 
 
 def test_http_response_parser_code_above_999(response) -> None:
-    with pytest.raises(http_exceptions.BadHttpMessage):
+    with pytest.raises(http_exceptions.BadStatusLine):
         response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
 
 
 def test_http_response_parser_code_not_int(response) -> None:
-    with pytest.raises(http_exceptions.BadHttpMessage):
+    with pytest.raises(http_exceptions.BadStatusLine):
         response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
 
 
