File: 0001-Validate-Chunked-Encoding-chunk-footer.patch

package info (click to toggle)
python-h11 0.14.0-1.1~deb12u1
links: PTS, VCS
area: main
in suites: bookworm
size: 652 kB
sloc: python: 3,905; javascript: 204; makefile: 198; sh: 5
file content (169 lines) | stat: -rw-r--r-- 6,989 bytes
parent folder | download | duplicates (2)
From 8b97933b259f34e5c66a4a1ae46c6fc176e26999 Mon Sep 17 00:00:00 2001
From: "Nathaniel J. Smith" <njs@anthropic.com>
Date: Thu, 9 Jan 2025 23:41:42 -0800
Subject: Validate Chunked-Encoding chunk footer

Also add a bit more thoroughness to some tests that I noticed while I
was working on it.

Thanks to Jeppe Bonde Weikop for the report.
---
 h11/_readers.py      | 23 +++++++++++--------
 h11/tests/test_io.py | 54 +++++++++++++++++++++++++++++++-------------
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/h11/_readers.py b/h11/_readers.py
index 08a9574..1348565 100644
--- a/h11/_readers.py
+++ b/h11/_readers.py
@@ -148,10 +148,9 @@ chunk_header_re = re.compile(chunk_header.encode("ascii"))
 class ChunkedReader:
     def __init__(self) -> None:
         self._bytes_in_chunk = 0
-        # After reading a chunk, we have to throw away the trailing \r\n; if
-        # this is >0 then we discard that many bytes before resuming regular
-        # de-chunkification.
-        self._bytes_to_discard = 0
+        # After reading a chunk, we have to throw away the trailing \r\n.
+        # This tracks the bytes that we need to match and throw away.
+        self._bytes_to_discard = b""
         self._reading_trailer = False
 
     def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
@@ -160,15 +159,19 @@ class ChunkedReader:
             if lines is None:
                 return None
             return EndOfMessage(headers=list(_decode_header_lines(lines)))
-        if self._bytes_to_discard > 0:
-            data = buf.maybe_extract_at_most(self._bytes_to_discard)
+        if self._bytes_to_discard:
+            data = buf.maybe_extract_at_most(len(self._bytes_to_discard))
             if data is None:
                 return None
-            self._bytes_to_discard -= len(data)
-            if self._bytes_to_discard > 0:
+            if data != self._bytes_to_discard[:len(data)]:
+                raise LocalProtocolError(
+                    f"malformed chunk footer: {data!r} (expected {self._bytes_to_discard!r})"
+                )
+            self._bytes_to_discard = self._bytes_to_discard[len(data):]
+            if self._bytes_to_discard:
                 return None
             # else, fall through and read some more
-        assert self._bytes_to_discard == 0
+        assert self._bytes_to_discard == b""
         if self._bytes_in_chunk == 0:
             # We need to refill our chunk count
             chunk_header = buf.maybe_extract_next_line()
@@ -194,7 +197,7 @@ class ChunkedReader:
             return None
         self._bytes_in_chunk -= len(data)
         if self._bytes_in_chunk == 0:
-            self._bytes_to_discard = 2
+            self._bytes_to_discard = b"\r\n"
             chunk_end = True
         else:
             chunk_end = False
diff --git a/h11/tests/test_io.py b/h11/tests/test_io.py
index 2b47c0e..634c49d 100644
--- a/h11/tests/test_io.py
+++ b/h11/tests/test_io.py
@@ -360,22 +360,34 @@ def _run_reader(*args: Any) -> List[Event]:
     return normalize_data_events(events)
 
 
-def t_body_reader(thunk: Any, data: bytes, expected: Any, do_eof: bool = False) -> None:
+def t_body_reader(thunk: Any, data: bytes, expected: list, do_eof: bool = False) -> None:
     # Simple: consume whole thing
     print("Test 1")
     buf = makebuf(data)
-    assert _run_reader(thunk(), buf, do_eof) == expected
+    try:
+        assert _run_reader(thunk(), buf, do_eof) == expected
+    except LocalProtocolError:
+        if LocalProtocolError in expected:
+            pass
+        else:
+            raise
 
     # Incrementally growing buffer
     print("Test 2")
     reader = thunk()
     buf = ReceiveBuffer()
     events = []
-    for i in range(len(data)):
-        events += _run_reader(reader, buf, False)
-        buf += data[i : i + 1]
-    events += _run_reader(reader, buf, do_eof)
-    assert normalize_data_events(events) == expected
+    try:
+        for i in range(len(data)):
+            events += _run_reader(reader, buf, False)
+            buf += data[i : i + 1]
+        events += _run_reader(reader, buf, do_eof)
+        assert normalize_data_events(events) == expected
+    except LocalProtocolError:
+        if LocalProtocolError in expected:
+            pass
+        else:
+            raise
 
     is_complete = any(type(event) is EndOfMessage for event in expected)
     if is_complete and not do_eof:
@@ -436,14 +448,12 @@ def test_ChunkedReader() -> None:
     )
 
     # refuses arbitrarily long chunk integers
-    with pytest.raises(LocalProtocolError):
-        # Technically this is legal HTTP/1.1, but we refuse to process chunk
-        # sizes that don't fit into 20 characters of hex
-        t_body_reader(ChunkedReader, b"9" * 100 + b"\r\nxxx", [Data(data=b"xxx")])
+    # Technically this is legal HTTP/1.1, but we refuse to process chunk
+    # sizes that don't fit into 20 characters of hex
+    t_body_reader(ChunkedReader, b"9" * 100 + b"\r\nxxx", [LocalProtocolError])
 
     # refuses garbage in the chunk count
-    with pytest.raises(LocalProtocolError):
-        t_body_reader(ChunkedReader, b"10\x00\r\nxxx", None)
+    t_body_reader(ChunkedReader, b"10\x00\r\nxxx", [LocalProtocolError])
 
     # handles (and discards) "chunk extensions" omg wtf
     t_body_reader(
@@ -457,10 +467,22 @@ def test_ChunkedReader() -> None:
 
     t_body_reader(
         ChunkedReader,
-        b"5   	 \r\n01234\r\n" + b"0\r\n\r\n",
+        b"5   \t \r\n01234\r\n" + b"0\r\n\r\n",
         [Data(data=b"01234"), EndOfMessage()],
     )
 
+    # Chunked encoding with bad chunk termination characters are refused. Originally we
+    # simply dropped the 2 bytes after a chunk, instead of validating that the bytes
+    # were \r\n -- so we would successfully decode the data below as b"xxxa". And
+    # apparently there are other HTTP processors that ignore the chunk length and just
+    # keep reading until they see \r\n, so they would decode it as b"xxx__1a". Any time
+    # two HTTP processors accept the same input but interpret it differently, there's a
+    # possibility of request smuggling shenanigans. So we now reject this.
+    t_body_reader(ChunkedReader, b"3\r\nxxx__1a\r\n", [LocalProtocolError])
+
+    # Confirm we check both bytes individually
+    t_body_reader(ChunkedReader, b"3\r\nxxx\r_1a\r\n", [LocalProtocolError])
+    t_body_reader(ChunkedReader, b"3\r\nxxx_\n1a\r\n", [LocalProtocolError])
 
 def test_ContentLengthWriter() -> None:
     w = ContentLengthWriter(5)
@@ -483,8 +505,8 @@ def test_ContentLengthWriter() -> None:
         dowrite(w, EndOfMessage())
 
     w = ContentLengthWriter(5)
-    dowrite(w, Data(data=b"123")) == b"123"
-    dowrite(w, Data(data=b"45")) == b"45"
+    assert dowrite(w, Data(data=b"123")) == b"123"
+    assert dowrite(w, Data(data=b"45")) == b"45"
     with pytest.raises(LocalProtocolError):
         dowrite(w, EndOfMessage(headers=[("Etag", "asdf")]))
 
-- 
2.30.2