File: test_engine_stop_download_bytes.py

package info (click to toggle)
python-scrapy 2.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,308 kB
  • sloc: python: 55,321; xml: 199; makefile: 25; sh: 7
file content (82 lines) | stat: -rw-r--r-- 3,147 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from __future__ import annotations

from typing import TYPE_CHECKING

from testfixtures import LogCapture

from scrapy.exceptions import StopDownload
from scrapy.utils.defer import deferred_f_from_coro_f
from tests.test_engine import (
    AttrsItemsSpider,
    CrawlerRun,
    DataClassItemsSpider,
    DictItemsSpider,
    MySpider,
    TestEngineBase,
)

if TYPE_CHECKING:
    from tests.mockserver.http import MockServer


class BytesReceivedCrawlerRun(CrawlerRun):
    def bytes_received(self, data, request, spider):
        super().bytes_received(data, request, spider)
        raise StopDownload(fail=False)


class TestBytesReceivedEngine(TestEngineBase):
    @deferred_f_from_coro_f
    async def test_crawler(self, mockserver: MockServer) -> None:
        for spider in (
            MySpider,
            DictItemsSpider,
            AttrsItemsSpider,
            DataClassItemsSpider,
        ):
            run = BytesReceivedCrawlerRun(spider)
            with LogCapture() as log:
                await run.run(mockserver)
                log.check_present(
                    (
                        "scrapy.core.downloader.handlers.http11",
                        "DEBUG",
                        f"Download stopped for <GET {mockserver.url('/redirected')}> "
                        "from signal handler BytesReceivedCrawlerRun.bytes_received",
                    )
                )
                log.check_present(
                    (
                        "scrapy.core.downloader.handlers.http11",
                        "DEBUG",
                        f"Download stopped for <GET {mockserver.url('/static/')}> "
                        "from signal handler BytesReceivedCrawlerRun.bytes_received",
                    )
                )
                log.check_present(
                    (
                        "scrapy.core.downloader.handlers.http11",
                        "DEBUG",
                        f"Download stopped for <GET {mockserver.url('/numbers')}> "
                        "from signal handler BytesReceivedCrawlerRun.bytes_received",
                    )
                )
            self._assert_visited_urls(run)
            self._assert_scheduled_requests(run, count=9)
            self._assert_downloaded_responses(run, count=9)
            self._assert_signals_caught(run)
            self._assert_headers_received(run)
            self._assert_bytes_received(run)

    @staticmethod
    def _assert_bytes_received(run: CrawlerRun) -> None:
        assert len(run.bytes) == 9
        for request, data in run.bytes.items():
            joined_data = b"".join(data)
            assert len(data) == 1  # signal was fired only once
            if run.getpath(request.url) == "/numbers":
                # Received bytes are not the complete response. The exact amount depends
                # on the buffer size, which can vary, so we only check that the amount
                # of received bytes is strictly less than the full response.
                numbers = [str(x).encode("utf8") for x in range(2**18)]
                assert len(joined_data) < len(b"".join(numbers))