File: test_downloader_handler_twisted_http2.py

package info (click to toggle)
python-scrapy 2.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,308 kB
  • sloc: python: 55,321; xml: 199; makefile: 25; sh: 7
file content (218 lines) | stat: -rw-r--r-- 8,368 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Tests for scrapy.core.downloader.handlers.http2.H2DownloadHandler."""

from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any
from unittest import mock

import pytest
from testfixtures import LogCapture
from twisted.internet import defer, error
from twisted.web.error import SchemeNotSupported
from twisted.web.http import H2_ENABLED

from scrapy.http import Request
from scrapy.utils.defer import deferred_f_from_coro_f, maybe_deferred_to_future
from tests.test_downloader_handlers_http_base import (
    TestHttpProxyBase,
    TestHttps11Base,
    TestHttpsCustomCiphersBase,
    TestHttpsInvalidDNSIdBase,
    TestHttpsInvalidDNSPatternBase,
    TestHttpsWrongHostnameBase,
    TestHttpWithCrawlerBase,
)

if TYPE_CHECKING:
    from scrapy.core.downloader.handlers import DownloadHandlerProtocol
    from tests.mockserver.http import MockServer
    from tests.mockserver.proxy_echo import ProxyEchoMockServer

pytestmark = pytest.mark.skipif(
    not H2_ENABLED, reason="HTTP/2 support in Twisted is not enabled"
)


class H2DownloadHandlerMixin:
    @property
    def download_handler_cls(self) -> type[DownloadHandlerProtocol]:
        # the import can fail when H2_ENABLED is False
        from scrapy.core.downloader.handlers.http2 import (  # noqa: PLC0415
            H2DownloadHandler,
        )

        return H2DownloadHandler


class TestHttps2(H2DownloadHandlerMixin, TestHttps11Base):
    HTTP2_DATALOSS_SKIP_REASON = "Content-Length mismatch raises InvalidBodyLengthError"

    @deferred_f_from_coro_f
    async def test_protocol(self, mockserver: MockServer) -> None:
        request = Request(
            mockserver.url("/host", is_secure=self.is_secure), method="GET"
        )
        async with self.get_dh() as download_handler:
            response = await download_handler.download_request(request)
        assert response.protocol == "h2"

    @deferred_f_from_coro_f
    async def test_download_with_maxsize_very_large_file(
        self, mockserver: MockServer
    ) -> None:
        from twisted.internet import reactor

        with mock.patch("scrapy.core.http2.stream.logger") as logger:
            request = Request(
                mockserver.url("/largechunkedfile", is_secure=self.is_secure)
            )

            def check(logger: mock.Mock) -> None:
                logger.error.assert_called_once_with(mock.ANY)

            async with self.get_dh({"DOWNLOAD_MAXSIZE": 1_500}) as download_handler:
                with pytest.raises((defer.CancelledError, error.ConnectionAborted)):
                    await download_handler.download_request(request)

            # As the error message is logged in the dataReceived callback, we
            # have to give a bit of time to the reactor to process the queue
            # after closing the connection.
            d: defer.Deferred[mock.Mock] = defer.Deferred()
            d.addCallback(check)
            reactor.callLater(0.1, d.callback, logger)
            await maybe_deferred_to_future(d)

    @deferred_f_from_coro_f
    async def test_unsupported_scheme(self) -> None:
        request = Request("ftp://unsupported.scheme")
        async with self.get_dh() as download_handler:
            with pytest.raises(SchemeNotSupported):
                await download_handler.download_request(request)

    def test_download_cause_data_loss(self) -> None:  # type: ignore[override]
        pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)

    def test_download_allow_data_loss(self) -> None:  # type: ignore[override]
        pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)

    def test_download_allow_data_loss_via_setting(self) -> None:  # type: ignore[override]
        pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)

    @deferred_f_from_coro_f
    async def test_concurrent_requests_same_domain(
        self, mockserver: MockServer
    ) -> None:
        request1 = Request(mockserver.url("/text", is_secure=self.is_secure))
        request2 = Request(
            mockserver.url("/echo", is_secure=self.is_secure), method="POST"
        )
        async with self.get_dh() as download_handler:
            response1 = await download_handler.download_request(request1)
            assert response1.body == b"Works"
            response2 = await download_handler.download_request(request2)
            assert response2.headers["Content-Length"] == b"79"

    @pytest.mark.xfail(reason="https://github.com/python-hyper/h2/issues/1247")
    @deferred_f_from_coro_f
    async def test_connect_request(self, mockserver: MockServer) -> None:
        request = Request(
            mockserver.url("/file", is_secure=self.is_secure), method="CONNECT"
        )
        async with self.get_dh() as download_handler:
            response = await download_handler.download_request(request)
        assert response.body == b""

    @deferred_f_from_coro_f
    async def test_custom_content_length_good(self, mockserver: MockServer) -> None:
        request = Request(mockserver.url("/contentlength", is_secure=self.is_secure))
        custom_content_length = str(len(request.body))
        request.headers["Content-Length"] = custom_content_length
        async with self.get_dh() as download_handler:
            response = await download_handler.download_request(request)
        assert response.text == custom_content_length

    @deferred_f_from_coro_f
    async def test_custom_content_length_bad(self, mockserver: MockServer) -> None:
        request = Request(mockserver.url("/contentlength", is_secure=self.is_secure))
        actual_content_length = str(len(request.body))
        bad_content_length = str(len(request.body) + 1)
        request.headers["Content-Length"] = bad_content_length
        async with self.get_dh() as download_handler:
            with LogCapture() as log:
                response = await download_handler.download_request(request)
        assert response.text == actual_content_length
        log.check_present(
            (
                "scrapy.core.http2.stream",
                "WARNING",
                f"Ignoring bad Content-Length header "
                f"{bad_content_length!r} of request {request}, sending "
                f"{actual_content_length!r} instead",
            )
        )

    @deferred_f_from_coro_f
    async def test_duplicate_header(self, mockserver: MockServer) -> None:
        request = Request(mockserver.url("/echo", is_secure=self.is_secure))
        header, value1, value2 = "Custom-Header", "foo", "bar"
        request.headers.appendlist(header, value1)
        request.headers.appendlist(header, value2)
        async with self.get_dh() as download_handler:
            response = await download_handler.download_request(request)
        assert json.loads(response.text)["headers"][header] == [value1, value2]


class TestHttps2WrongHostname(H2DownloadHandlerMixin, TestHttpsWrongHostnameBase):
    pass


class TestHttps2InvalidDNSId(H2DownloadHandlerMixin, TestHttpsInvalidDNSIdBase):
    pass


class TestHttps2InvalidDNSPattern(
    H2DownloadHandlerMixin, TestHttpsInvalidDNSPatternBase
):
    pass


class TestHttps2CustomCiphers(H2DownloadHandlerMixin, TestHttpsCustomCiphersBase):
    pass


class TestHttp2WithCrawler(TestHttpWithCrawlerBase):
    """HTTP 2.0 test case with MockServer"""

    @property
    def settings_dict(self) -> dict[str, Any] | None:
        return {
            "DOWNLOAD_HANDLERS": {
                "https": "scrapy.core.downloader.handlers.http2.H2DownloadHandler"
            }
        }

    is_secure = True


class TestHttps2Proxy(H2DownloadHandlerMixin, TestHttpProxyBase):
    is_secure = True
    expected_http_proxy_request_body = b"/"

    @deferred_f_from_coro_f
    async def test_download_with_proxy_https_timeout(
        self, proxy_mockserver: ProxyEchoMockServer
    ) -> None:
        with pytest.raises(NotImplementedError):
            await maybe_deferred_to_future(
                super().test_download_with_proxy_https_timeout(proxy_mockserver)
            )

    @deferred_f_from_coro_f
    async def test_download_with_proxy_without_http_scheme(
        self, proxy_mockserver: ProxyEchoMockServer
    ) -> None:
        with pytest.raises(SchemeNotSupported):
            await maybe_deferred_to_future(
                super().test_download_with_proxy_without_http_scheme(proxy_mockserver)
            )