1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
from testfixtures import LogCapture
from twisted.internet import defer
from scrapy.exceptions import StopDownload
from tests.test_engine import (
AttrsItemsSpider,
CrawlerRun,
DataClassItemsSpider,
DictItemsSpider,
MySpider,
TestEngineBase,
)
class HeadersReceivedCrawlerRun(CrawlerRun):
def headers_received(self, headers, body_length, request, spider):
super().headers_received(headers, body_length, request, spider)
raise StopDownload(fail=False)
class TestHeadersReceivedEngine(TestEngineBase):
@defer.inlineCallbacks
def test_crawler(self):
for spider in (
MySpider,
DictItemsSpider,
AttrsItemsSpider,
DataClassItemsSpider,
):
run = HeadersReceivedCrawlerRun(spider)
with LogCapture() as log:
yield run.run()
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/redirected> from"
" signal handler HeadersReceivedCrawlerRun.headers_received",
)
)
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/> from signal"
" handler HeadersReceivedCrawlerRun.headers_received",
)
)
log.check_present(
(
"scrapy.core.downloader.handlers.http11",
"DEBUG",
f"Download stopped for <GET http://localhost:{run.portno}/numbers> from"
" signal handler HeadersReceivedCrawlerRun.headers_received",
)
)
self._assert_visited_urls(run)
self._assert_downloaded_responses(run, count=6)
self._assert_signals_caught(run)
self._assert_bytes_received(run)
self._assert_headers_received(run)
@staticmethod
def _assert_bytes_received(run: CrawlerRun) -> None:
assert len(run.bytes) == 0
@staticmethod
def _assert_visited_urls(run: CrawlerRun) -> None:
must_be_visited = ["/", "/redirect", "/redirected"]
urls_visited = {rp[0].url for rp in run.respplug}
urls_expected = {run.geturl(p) for p in must_be_visited}
assert urls_expected <= urls_visited, (
f"URLs not visited: {list(urls_expected - urls_visited)}"
)
|