1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
import warnings
from asyncio import sleep
import pytest
from testfixtures import LogCapture
from twisted.trial.unittest import TestCase
from scrapy import Spider, signals
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.defer import deferred_f_from_coro_f, maybe_deferred_to_future
from scrapy.utils.test import get_crawler
from .utils import twisted_sleep
SLEEP_SECONDS = 0.1
ITEM_A = {"id": "a"}
ITEM_B = {"id": "b"}
class MainTestCase(TestCase):
async def _test_spider(self, spider, expected_items=None):
actual_items = []
expected_items = [] if expected_items is None else expected_items
def track_item(item, response, spider):
actual_items.append(item)
crawler = get_crawler(spider)
crawler.signals.connect(track_item, signals.item_scraped)
await maybe_deferred_to_future(crawler.crawl())
assert crawler.stats.get_value("finish_reason") == "finished"
assert actual_items == expected_items
@deferred_f_from_coro_f
async def test_start_urls(self):
class TestSpider(Spider):
name = "test"
start_urls = ["data:,"]
async def parse(self, response):
yield ITEM_A
with warnings.catch_warnings():
warnings.simplefilter("error")
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_start(self):
class TestSpider(Spider):
name = "test"
async def start(self):
yield ITEM_A
with warnings.catch_warnings():
warnings.simplefilter("error")
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_start_subclass(self):
class BaseSpider(Spider):
async def start(self):
yield ITEM_A
class TestSpider(BaseSpider):
name = "test"
with warnings.catch_warnings():
warnings.simplefilter("error")
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_deprecated(self):
class TestSpider(Spider):
name = "test"
def start_requests(self):
yield ITEM_A
with pytest.warns(ScrapyDeprecationWarning):
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_deprecated_subclass(self):
class BaseSpider(Spider):
def start_requests(self):
yield ITEM_A
class TestSpider(BaseSpider):
name = "test"
# The warning must be about the base class and not the subclass.
with pytest.warns(ScrapyDeprecationWarning, match="BaseSpider"):
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_universal(self):
class TestSpider(Spider):
name = "test"
async def start(self):
yield ITEM_A
def start_requests(self):
yield ITEM_B
with warnings.catch_warnings():
warnings.simplefilter("error")
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_universal_subclass(self):
class BaseSpider(Spider):
async def start(self):
yield ITEM_A
def start_requests(self):
yield ITEM_B
class TestSpider(BaseSpider):
name = "test"
with warnings.catch_warnings():
warnings.simplefilter("error")
await self._test_spider(TestSpider, [ITEM_A])
@deferred_f_from_coro_f
async def test_start_deprecated_super(self):
class TestSpider(Spider):
name = "test"
async def start(self):
for item_or_request in super().start_requests():
yield item_or_request
with pytest.warns(
ScrapyDeprecationWarning, match=r"use Spider\.start\(\) instead"
) as messages:
await self._test_spider(TestSpider, [])
assert messages[0].filename.endswith("test_spider_start.py")
async def _test_start(self, start_, expected_items=None):
class TestSpider(Spider):
name = "test"
start = start_
await self._test_spider(TestSpider, expected_items)
@pytest.mark.only_asyncio
@deferred_f_from_coro_f
async def test_asyncio_delayed(self):
async def start(spider):
await sleep(SLEEP_SECONDS)
yield ITEM_A
await self._test_start(start, [ITEM_A])
@deferred_f_from_coro_f
async def test_twisted_delayed(self):
async def start(spider):
await maybe_deferred_to_future(twisted_sleep(SLEEP_SECONDS))
yield ITEM_A
await self._test_start(start, [ITEM_A])
# Exceptions
@deferred_f_from_coro_f
async def test_deprecated_non_generator_exception(self):
class TestSpider(Spider):
name = "test"
def start_requests(self):
raise RuntimeError
with (
LogCapture() as log,
pytest.warns(
ScrapyDeprecationWarning,
match=r"defines the deprecated start_requests\(\) method",
),
):
await self._test_spider(TestSpider, [])
assert "in start_requests\n raise RuntimeError" in str(log)
|