1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
"""Tests for scrapy.core.downloader.handlers.http2.H2DownloadHandler."""
from __future__ import annotations
import json
from typing import TYPE_CHECKING, Any
from unittest import mock
import pytest
from testfixtures import LogCapture
from twisted.internet import defer, error
from twisted.web.error import SchemeNotSupported
from twisted.web.http import H2_ENABLED
from scrapy.http import Request
from scrapy.utils.defer import deferred_f_from_coro_f, maybe_deferred_to_future
from tests.test_downloader_handlers_http_base import (
TestHttpProxyBase,
TestHttps11Base,
TestHttpsCustomCiphersBase,
TestHttpsInvalidDNSIdBase,
TestHttpsInvalidDNSPatternBase,
TestHttpsWrongHostnameBase,
TestHttpWithCrawlerBase,
)
if TYPE_CHECKING:
from scrapy.core.downloader.handlers import DownloadHandlerProtocol
from tests.mockserver.http import MockServer
from tests.mockserver.proxy_echo import ProxyEchoMockServer
pytestmark = pytest.mark.skipif(
not H2_ENABLED, reason="HTTP/2 support in Twisted is not enabled"
)
class H2DownloadHandlerMixin:
@property
def download_handler_cls(self) -> type[DownloadHandlerProtocol]:
# the import can fail when H2_ENABLED is False
from scrapy.core.downloader.handlers.http2 import ( # noqa: PLC0415
H2DownloadHandler,
)
return H2DownloadHandler
class TestHttps2(H2DownloadHandlerMixin, TestHttps11Base):
HTTP2_DATALOSS_SKIP_REASON = "Content-Length mismatch raises InvalidBodyLengthError"
@deferred_f_from_coro_f
async def test_protocol(self, mockserver: MockServer) -> None:
request = Request(
mockserver.url("/host", is_secure=self.is_secure), method="GET"
)
async with self.get_dh() as download_handler:
response = await download_handler.download_request(request)
assert response.protocol == "h2"
@deferred_f_from_coro_f
async def test_download_with_maxsize_very_large_file(
self, mockserver: MockServer
) -> None:
from twisted.internet import reactor
with mock.patch("scrapy.core.http2.stream.logger") as logger:
request = Request(
mockserver.url("/largechunkedfile", is_secure=self.is_secure)
)
def check(logger: mock.Mock) -> None:
logger.error.assert_called_once_with(mock.ANY)
async with self.get_dh({"DOWNLOAD_MAXSIZE": 1_500}) as download_handler:
with pytest.raises((defer.CancelledError, error.ConnectionAborted)):
await download_handler.download_request(request)
# As the error message is logged in the dataReceived callback, we
# have to give a bit of time to the reactor to process the queue
# after closing the connection.
d: defer.Deferred[mock.Mock] = defer.Deferred()
d.addCallback(check)
reactor.callLater(0.1, d.callback, logger)
await maybe_deferred_to_future(d)
@deferred_f_from_coro_f
async def test_unsupported_scheme(self) -> None:
request = Request("ftp://unsupported.scheme")
async with self.get_dh() as download_handler:
with pytest.raises(SchemeNotSupported):
await download_handler.download_request(request)
def test_download_cause_data_loss(self) -> None: # type: ignore[override]
pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)
def test_download_allow_data_loss(self) -> None: # type: ignore[override]
pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)
def test_download_allow_data_loss_via_setting(self) -> None: # type: ignore[override]
pytest.skip(self.HTTP2_DATALOSS_SKIP_REASON)
@deferred_f_from_coro_f
async def test_concurrent_requests_same_domain(
self, mockserver: MockServer
) -> None:
request1 = Request(mockserver.url("/text", is_secure=self.is_secure))
request2 = Request(
mockserver.url("/echo", is_secure=self.is_secure), method="POST"
)
async with self.get_dh() as download_handler:
response1 = await download_handler.download_request(request1)
assert response1.body == b"Works"
response2 = await download_handler.download_request(request2)
assert response2.headers["Content-Length"] == b"79"
@pytest.mark.xfail(reason="https://github.com/python-hyper/h2/issues/1247")
@deferred_f_from_coro_f
async def test_connect_request(self, mockserver: MockServer) -> None:
request = Request(
mockserver.url("/file", is_secure=self.is_secure), method="CONNECT"
)
async with self.get_dh() as download_handler:
response = await download_handler.download_request(request)
assert response.body == b""
@deferred_f_from_coro_f
async def test_custom_content_length_good(self, mockserver: MockServer) -> None:
request = Request(mockserver.url("/contentlength", is_secure=self.is_secure))
custom_content_length = str(len(request.body))
request.headers["Content-Length"] = custom_content_length
async with self.get_dh() as download_handler:
response = await download_handler.download_request(request)
assert response.text == custom_content_length
@deferred_f_from_coro_f
async def test_custom_content_length_bad(self, mockserver: MockServer) -> None:
request = Request(mockserver.url("/contentlength", is_secure=self.is_secure))
actual_content_length = str(len(request.body))
bad_content_length = str(len(request.body) + 1)
request.headers["Content-Length"] = bad_content_length
async with self.get_dh() as download_handler:
with LogCapture() as log:
response = await download_handler.download_request(request)
assert response.text == actual_content_length
log.check_present(
(
"scrapy.core.http2.stream",
"WARNING",
f"Ignoring bad Content-Length header "
f"{bad_content_length!r} of request {request}, sending "
f"{actual_content_length!r} instead",
)
)
@deferred_f_from_coro_f
async def test_duplicate_header(self, mockserver: MockServer) -> None:
request = Request(mockserver.url("/echo", is_secure=self.is_secure))
header, value1, value2 = "Custom-Header", "foo", "bar"
request.headers.appendlist(header, value1)
request.headers.appendlist(header, value2)
async with self.get_dh() as download_handler:
response = await download_handler.download_request(request)
assert json.loads(response.text)["headers"][header] == [value1, value2]
class TestHttps2WrongHostname(H2DownloadHandlerMixin, TestHttpsWrongHostnameBase):
pass
class TestHttps2InvalidDNSId(H2DownloadHandlerMixin, TestHttpsInvalidDNSIdBase):
pass
class TestHttps2InvalidDNSPattern(
H2DownloadHandlerMixin, TestHttpsInvalidDNSPatternBase
):
pass
class TestHttps2CustomCiphers(H2DownloadHandlerMixin, TestHttpsCustomCiphersBase):
pass
class TestHttp2WithCrawler(TestHttpWithCrawlerBase):
"""HTTP 2.0 test case with MockServer"""
@property
def settings_dict(self) -> dict[str, Any] | None:
return {
"DOWNLOAD_HANDLERS": {
"https": "scrapy.core.downloader.handlers.http2.H2DownloadHandler"
}
}
is_secure = True
class TestHttps2Proxy(H2DownloadHandlerMixin, TestHttpProxyBase):
is_secure = True
expected_http_proxy_request_body = b"/"
@deferred_f_from_coro_f
async def test_download_with_proxy_https_timeout(
self, proxy_mockserver: ProxyEchoMockServer
) -> None:
with pytest.raises(NotImplementedError):
await maybe_deferred_to_future(
super().test_download_with_proxy_https_timeout(proxy_mockserver)
)
@deferred_f_from_coro_f
async def test_download_with_proxy_without_http_scheme(
self, proxy_mockserver: ProxyEchoMockServer
) -> None:
with pytest.raises(SchemeNotSupported):
await maybe_deferred_to_future(
super().test_download_with_proxy_without_http_scheme(proxy_mockserver)
)
|