Combined backport of

From cebe526b9c34dc3a3da9140409db63014bc4cf19 Mon Sep 17 00:00:00 2001
From: Sam Bull <git@sambull.org>
Date: Sun, 7 Apr 2024 13:19:31 +0100
Subject: [PATCH] Fix handling of multipart/form-data (#8280) (#8302)

From 7eecdff163ccf029fbb1ddc9de4169d4aaeb6597 Mon Sep 17 00:00:00 2001
From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 20:47:19 +0100
Subject: [PATCH] [PR #8332/482e6cdf backport][3.9] Add set_content_disposition
 test (#8333)

From f21c6f2ca512a026ce7f0f6c6311f62d6a638866 Mon Sep 17 00:00:00 2001
From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 21:54:12 +0100
Subject: [PATCH] [PR #8335/5a6949da backport][3.9] Add Content-Disposition
 automatically (#8336)

--- python-aiohttp-3.8.4.orig/aiohttp/formdata.py
+++ python-aiohttp-3.8.4/aiohttp/formdata.py
@@ -1,4 +1,5 @@
 import io
+import warnings
 from typing import Any, Iterable, List, Optional
 from urllib.parse import urlencode
 
@@ -53,7 +54,12 @@ class FormData:
         if isinstance(value, io.IOBase):
             self._is_multipart = True
         elif isinstance(value, (bytes, bytearray, memoryview)):
+            msg = (
+                "In v4, passing bytes will no longer create a file field. "
+                "Please explicitly use the filename parameter or pass a BytesIO object."
+            )
             if filename is None and content_transfer_encoding is None:
+                warnings.warn(msg, DeprecationWarning)
                 filename = name
 
         type_options: MultiDict[str] = MultiDict({"name": name})
@@ -81,7 +87,11 @@ class FormData:
                     "content_transfer_encoding must be an instance"
                     " of str. Got: %s" % content_transfer_encoding
                 )
-            headers[hdrs.CONTENT_TRANSFER_ENCODING] = content_transfer_encoding
+            msg = (
+                "content_transfer_encoding is deprecated. "
+                "To maintain compatibility with v4 please pass a BytesPayload."
+            )
+            warnings.warn(msg, DeprecationWarning)
             self._is_multipart = True
 
         self._fields.append((type_options, headers, value))
--- python-aiohttp-3.8.4.orig/aiohttp/multipart.py
+++ python-aiohttp-3.8.4/aiohttp/multipart.py
@@ -255,13 +255,22 @@ class BodyPartReader:
     chunk_size = 8192
 
     def __init__(
-        self, boundary: bytes, headers: "CIMultiDictProxy[str]", content: StreamReader
+        self,
+        boundary: bytes,
+        headers: "CIMultiDictProxy[str]",
+        content: StreamReader,
+        *,
+        subtype: str = "mixed",
+        default_charset: Optional[str] = None,
     ) -> None:
         self.headers = headers
         self._boundary = boundary
         self._content = content
+        self._default_charset = default_charset
         self._at_eof = False
-        length = self.headers.get(CONTENT_LENGTH, None)
+        self._is_form_data = subtype == "form-data"
+        # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
+        length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None)
         self._length = int(length) if length is not None else None
         self._read_bytes = 0
         # TODO: typeing.Deque is not supported by Python 3.5
@@ -329,6 +338,8 @@ class BodyPartReader:
         assert self._length is not None, "Content-Length required for chunked read"
         chunk_size = min(size, self._length - self._read_bytes)
         chunk = await self._content.read(chunk_size)
+        if self._content.at_eof():
+            self._at_eof = True
         return chunk
 
     async def _read_chunk_from_stream(self, size: int) -> bytes:
@@ -444,7 +455,8 @@ class BodyPartReader:
         """
         if CONTENT_TRANSFER_ENCODING in self.headers:
             data = self._decode_content_transfer(data)
-        if CONTENT_ENCODING in self.headers:
+        # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
+        if not self._is_form_data and CONTENT_ENCODING in self.headers:
             return self._decode_content(data)
         return data
 
@@ -478,7 +490,7 @@ class BodyPartReader:
         """Returns charset parameter from Content-Type header or default."""
         ctype = self.headers.get(CONTENT_TYPE, "")
         mimetype = parse_mimetype(ctype)
-        return mimetype.parameters.get("charset", default)
+        return mimetype.parameters.get("charset", self._default_charset or default)
 
     @reify
     def name(self) -> Optional[str]:
@@ -533,9 +545,17 @@ class MultipartReader:
     part_reader_cls = BodyPartReader
 
     def __init__(self, headers: Mapping[str, str], content: StreamReader) -> None:
+        self._mimetype = parse_mimetype(headers[CONTENT_TYPE])
+        assert self._mimetype.type == "multipart", "multipart/* content type expected"
+        if "boundary" not in self._mimetype.parameters:
+            raise ValueError(
+                "boundary missed for Content-Type: %s" % headers[CONTENT_TYPE]
+            )
+
         self.headers = headers
         self._boundary = ("--" + self._get_boundary()).encode()
         self._content = content
+        self._default_charset: Optional[str] = None
         self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
         self._at_eof = False
         self._at_bof = True
@@ -587,7 +607,24 @@ class MultipartReader:
             await self._read_boundary()
         if self._at_eof:  # we just read the last boundary, nothing to do there
             return None
-        self._last_part = await self.fetch_next_part()
+
+        part = await self.fetch_next_part()
+        # https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
+        if (
+            self._last_part is None
+            and self._mimetype.subtype == "form-data"
+            and isinstance(part, BodyPartReader)
+        ):
+            _, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION))
+            if params.get("name") == "_charset_":
+                # Longest encoding in https://encoding.spec.whatwg.org/encodings.json
+                # is 19 characters, so 32 should be more than enough for any valid encoding.
+                charset = await part.read_chunk(32)
+                if len(charset) > 31:
+                    raise RuntimeError("Invalid default charset")
+                self._default_charset = charset.strip().decode()
+                part = await self.fetch_next_part()
+        self._last_part = part
         return self._last_part
 
     async def release(self) -> None:
@@ -623,19 +660,16 @@ class MultipartReader:
                 return type(self)(headers, self._content)
             return self.multipart_reader_cls(headers, self._content)
         else:
-            return self.part_reader_cls(self._boundary, headers, self._content)
-
-    def _get_boundary(self) -> str:
-        mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
-
-        assert mimetype.type == "multipart", "multipart/* content type expected"
-
-        if "boundary" not in mimetype.parameters:
-            raise ValueError(
-                "boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
+            return self.part_reader_cls(
+                self._boundary,
+                headers,
+                self._content,
+                subtype=self._mimetype.subtype,
+                default_charset=self._default_charset,
             )
 
-        boundary = mimetype.parameters["boundary"]
+    def _get_boundary(self) -> str:
+        boundary = self._mimetype.parameters["boundary"]
         if len(boundary) > 70:
             raise ValueError("boundary %r is too long (70 chars max)" % boundary)
 
@@ -726,6 +760,7 @@ class MultipartWriter(Payload):
         super().__init__(None, content_type=ctype)
 
         self._parts: List[_Part] = []
+        self._is_form_data = subtype == "form-data"
 
     def __enter__(self) -> "MultipartWriter":
         return self
@@ -803,32 +838,38 @@ class MultipartWriter(Payload):
 
     def append_payload(self, payload: Payload) -> Payload:
         """Adds a new body part to multipart writer."""
-        # compression
-        encoding: Optional[str] = payload.headers.get(
-            CONTENT_ENCODING,
-            "",
-        ).lower()
-        if encoding and encoding not in ("deflate", "gzip", "identity"):
-            raise RuntimeError(f"unknown content encoding: {encoding}")
-        if encoding == "identity":
-            encoding = None
-
-        # te encoding
-        te_encoding: Optional[str] = payload.headers.get(
-            CONTENT_TRANSFER_ENCODING,
-            "",
-        ).lower()
-        if te_encoding not in ("", "base64", "quoted-printable", "binary"):
-            raise RuntimeError(
-                "unknown content transfer encoding: {}" "".format(te_encoding)
+        encoding: Optional[str] = None
+        te_encoding: Optional[str] = None
+        if self._is_form_data:
+            # https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
+            # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
+            assert (
+                not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING}
+                & payload.headers.keys()
             )
-        if te_encoding == "binary":
-            te_encoding = None
-
-        # size
-        size = payload.size
-        if size is not None and not (encoding or te_encoding):
-            payload.headers[CONTENT_LENGTH] = str(size)
+            # Set default Content-Disposition in case user doesn't create one
+            if CONTENT_DISPOSITION not in payload.headers:
+                name = f"section-{len(self._parts)}"
+                payload.set_content_disposition("form-data", name=name)
+        else:
+            # compression
+            encoding = payload.headers.get(CONTENT_ENCODING, "").lower()
+            if encoding and encoding not in ("deflate", "gzip", "identity"):
+                raise RuntimeError(f"unknown content encoding: {encoding}")
+            if encoding == "identity":
+                encoding = None
+
+            # te encoding
+            te_encoding = payload.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
+            if te_encoding not in ("", "base64", "quoted-printable", "binary"):
+                raise RuntimeError(f"unknown content transfer encoding: {te_encoding}")
+            if te_encoding == "binary":
+                te_encoding = None
+
+            # size
+            size = payload.size
+            if size is not None and not (encoding or te_encoding):
+                payload.headers[CONTENT_LENGTH] = str(size)
 
         self._parts.append((payload, encoding, te_encoding))  # type: ignore[arg-type]
         return payload
@@ -886,6 +927,11 @@ class MultipartWriter(Payload):
     async def write(self, writer: Any, close_boundary: bool = True) -> None:
         """Write body."""
         for part, encoding, te_encoding in self._parts:
+            if self._is_form_data:
+                # https://datatracker.ietf.org/doc/html/rfc7578#section-4.2
+                assert CONTENT_DISPOSITION in part.headers
+                assert "name=" in part.headers[CONTENT_DISPOSITION]
+
             await writer.write(b"--" + self._boundary + b"\r\n")
             await writer.write(part._binary_headers)
 
--- python-aiohttp-3.8.4.orig/tests/test_client_functional.py
+++ python-aiohttp-3.8.4/tests/test_client_functional.py
@@ -1158,48 +1158,6 @@ async def test_POST_DATA_with_charset_po
     resp.close()
 
 
-async def test_POST_DATA_with_context_transfer_encoding(aiohttp_client) -> None:
-    async def handler(request):
-        data = await request.post()
-        assert data["name"] == "text"
-        return web.Response(text=data["name"])
-
-    app = web.Application()
-    app.router.add_post("/", handler)
-    client = await aiohttp_client(app)
-
-    form = aiohttp.FormData()
-    form.add_field("name", "text", content_transfer_encoding="base64")
-
-    resp = await client.post("/", data=form)
-    assert 200 == resp.status
-    content = await resp.text()
-    assert content == "text"
-    resp.close()
-
-
-async def test_POST_DATA_with_content_type_context_transfer_encoding(aiohttp_client):
-    async def handler(request):
-        data = await request.post()
-        assert data["name"] == "text"
-        return web.Response(body=data["name"])
-
-    app = web.Application()
-    app.router.add_post("/", handler)
-    client = await aiohttp_client(app)
-
-    form = aiohttp.FormData()
-    form.add_field(
-        "name", "text", content_type="text/plain", content_transfer_encoding="base64"
-    )
-
-    resp = await client.post("/", data=form)
-    assert 200 == resp.status
-    content = await resp.text()
-    assert content == "text"
-    resp.close()
-
-
 async def test_POST_MultiDict(aiohttp_client) -> None:
     async def handler(request):
         data = await request.post()
@@ -1249,7 +1207,7 @@ async def test_POST_FILES(aiohttp_client
     client = await aiohttp_client(app)
 
     with fname.open("rb") as f:
-        resp = await client.post("/", data={"some": f, "test": b"data"}, chunked=True)
+        resp = await client.post("/", data={"some": f, "test": io.BytesIO(b"data")}, chunked=True)
         assert 200 == resp.status
         resp.close()
 
--- python-aiohttp-3.8.4.orig/tests/test_multipart.py
+++ python-aiohttp-3.8.4/tests/test_multipart.py
@@ -942,6 +942,58 @@ class TestMultipartReader:
             assert first.at_eof()
             assert not second.at_eof()
 
+    async def test_read_form_default_encoding(self) -> None:
+        with Stream(
+            b"--:\r\n"
+            b'Content-Disposition: form-data; name="_charset_"\r\n\r\n'
+            b"ascii"
+            b"\r\n"
+            b"--:\r\n"
+            b'Content-Disposition: form-data; name="field1"\r\n\r\n'
+            b"foo"
+            b"\r\n"
+            b"--:\r\n"
+            b"Content-Type: text/plain;charset=UTF-8\r\n"
+            b'Content-Disposition: form-data; name="field2"\r\n\r\n'
+            b"foo"
+            b"\r\n"
+            b"--:\r\n"
+            b'Content-Disposition: form-data; name="field3"\r\n\r\n'
+            b"foo"
+            b"\r\n"
+        ) as stream:
+            reader = aiohttp.MultipartReader(
+                {CONTENT_TYPE: 'multipart/form-data;boundary=":"'},
+                stream,
+            )
+            field1 = await reader.next()
+            assert field1.name == "field1"
+            assert field1.get_charset("default") == "ascii"
+            field2 = await reader.next()
+            assert field2.name == "field2"
+            assert field2.get_charset("default") == "UTF-8"
+            field3 = await reader.next()
+            assert field3.name == "field3"
+            assert field3.get_charset("default") == "ascii"
+
+    async def test_read_form_invalid_default_encoding(self) -> None:
+        with Stream(
+            b"--:\r\n"
+            b'Content-Disposition: form-data; name="_charset_"\r\n\r\n'
+            b"this-value-is-too-long-to-be-a-charset"
+            b"\r\n"
+            b"--:\r\n"
+            b'Content-Disposition: form-data; name="field1"\r\n\r\n'
+            b"foo"
+            b"\r\n"
+        ) as stream:
+            reader = aiohttp.MultipartReader(
+                {CONTENT_TYPE: 'multipart/form-data;boundary=":"'},
+                stream,
+            )
+            with pytest.raises(RuntimeError, match="Invalid default charset"):
+                await reader.next()
+
 
 async def test_writer(writer) -> None:
     assert writer.size == 7
@@ -1228,6 +1280,25 @@ class TestMultipartWriter:
         part = writer._parts[0][0]
         assert part.headers[CONTENT_TYPE] == "test/passed"
 
+    def test_set_content_disposition_after_append(self):
+        writer = aiohttp.MultipartWriter("form-data")
+        part = writer.append("some-data")
+        part.set_content_disposition("form-data", name="method")
+        assert 'name="method"' in part.headers[CONTENT_DISPOSITION]
+
+    def test_automatic_content_disposition(self):
+        writer = aiohttp.MultipartWriter("form-data")
+        writer.append_json(())
+        part = payload.StringPayload("foo")
+        part.set_content_disposition("form-data", name="second")
+        writer.append_payload(part)
+        writer.append("foo")
+
+        disps = tuple(p[0].headers[CONTENT_DISPOSITION] for p in writer._parts)
+        assert 'name="section-0"' in disps[0]
+        assert 'name="second"' in disps[1]
+        assert 'name="section-2"' in disps[2]
+
     def test_with(self) -> None:
         with aiohttp.MultipartWriter(boundary=":") as writer:
             writer.append("foo")
@@ -1278,7 +1349,6 @@ class TestMultipartWriter:
                         CONTENT_TYPE: "text/python",
                     },
                 )
-            content_length = part.size
             await writer.write(stream)
 
         assert part.headers[CONTENT_TYPE] == "text/python"
@@ -1289,9 +1359,7 @@ class TestMultipartWriter:
         assert headers == (
             b"--:\r\n"
             b"Content-Type: text/python\r\n"
-            b'Content-Disposition: attachments; filename="bug.py"\r\n'
-            b"Content-Length: %s"
-            b"" % (str(content_length).encode(),)
+            b'Content-Disposition: attachments; filename="bug.py"'
         )
 
     async def test_set_content_disposition_override(self, buf, stream):
@@ -1305,7 +1373,6 @@ class TestMultipartWriter:
                         CONTENT_TYPE: "text/python",
                     },
                 )
-            content_length = part.size
             await writer.write(stream)
 
         assert part.headers[CONTENT_TYPE] == "text/python"
@@ -1316,9 +1383,7 @@ class TestMultipartWriter:
         assert headers == (
             b"--:\r\n"
             b"Content-Type: text/python\r\n"
-            b'Content-Disposition: attachments; filename="bug.py"\r\n'
-            b"Content-Length: %s"
-            b"" % (str(content_length).encode(),)
+            b'Content-Disposition: attachments; filename="bug.py"'
         )
 
     async def test_reset_content_disposition_header(self, buf, stream):
@@ -1330,8 +1395,6 @@ class TestMultipartWriter:
                     headers={CONTENT_TYPE: "text/plain"},
                 )
 
-            content_length = part.size
-
             assert CONTENT_DISPOSITION in part.headers
 
             part.set_content_disposition("attachments", filename="bug.py")
@@ -1344,9 +1407,7 @@ class TestMultipartWriter:
             b"--:\r\n"
             b"Content-Type: text/plain\r\n"
             b"Content-Disposition:"
-            b' attachments; filename="bug.py"\r\n'
-            b"Content-Length: %s"
-            b"" % (str(content_length).encode(),)
+            b' attachments; filename="bug.py"'
         )
 
 
--- python-aiohttp-3.8.4.orig/tests/test_web_functional.py
+++ python-aiohttp-3.8.4/tests/test_web_functional.py
@@ -34,7 +34,8 @@ def fname(here):
 
 def new_dummy_form():
     form = FormData()
-    form.add_field("name", b"123", content_transfer_encoding="base64")
+    with pytest.warns(DeprecationWarning, match="BytesPayload"):
+        form.add_field("name", b"123", content_transfer_encoding="base64")
     return form
 
 
@@ -429,25 +430,6 @@ async def test_release_post_data(aiohttp
     await resp.release()
 
 
-async def test_POST_DATA_with_content_transfer_encoding(aiohttp_client) -> None:
-    async def handler(request):
-        data = await request.post()
-        assert b"123" == data["name"]
-        return web.Response()
-
-    app = web.Application()
-    app.router.add_post("/", handler)
-    client = await aiohttp_client(app)
-
-    form = FormData()
-    form.add_field("name", b"123", content_transfer_encoding="base64")
-
-    resp = await client.post("/", data=form)
-    assert 200 == resp.status
-
-    await resp.release()
-
-
 async def test_post_form_with_duplicate_keys(aiohttp_client) -> None:
     async def handler(request):
         data = await request.post()
@@ -505,7 +487,8 @@ async def test_100_continue(aiohttp_clie
         return web.Response()
 
     form = FormData()
-    form.add_field("name", b"123", content_transfer_encoding="base64")
+    with pytest.warns(DeprecationWarning, match="BytesPayload"):
+        form.add_field("name", b"123", content_transfer_encoding="base64")
 
     app = web.Application()
     app.router.add_post("/", handler)
@@ -683,7 +666,7 @@ async def test_upload_file(aiohttp_clien
     app.router.add_post("/", handler)
     client = await aiohttp_client(app)
 
-    resp = await client.post("/", data={"file": data})
+    resp = await client.post("/", data={"file": io.BytesIO(data)})
     assert 200 == resp.status
 
     await resp.release()
