File: test_closespider.py

package info (click to toggle)
python-scrapy 2.13.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,664 kB
  • sloc: python: 52,028; xml: 199; makefile: 25; sh: 7
file content (112 lines) | stat: -rw-r--r-- 4,356 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from twisted.internet import defer
from twisted.trial.unittest import TestCase

from scrapy.utils.test import get_crawler
from tests.mockserver import MockServer
from tests.spiders import (
    ErrorSpider,
    FollowAllSpider,
    ItemSpider,
    MaxItemsAndRequestsSpider,
    SlowSpider,
)


class TestCloseSpider(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.mockserver = MockServer()
        cls.mockserver.__enter__()

    @classmethod
    def tearDownClass(cls):
        cls.mockserver.__exit__(None, None, None)

    @defer.inlineCallbacks
    def test_closespider_itemcount(self):
        close_on = 5
        crawler = get_crawler(ItemSpider, {"CLOSESPIDER_ITEMCOUNT": close_on})
        yield crawler.crawl(mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_itemcount"
        itemcount = crawler.stats.get_value("item_scraped_count")
        assert itemcount >= close_on

    @defer.inlineCallbacks
    def test_closespider_pagecount(self):
        close_on = 5
        crawler = get_crawler(FollowAllSpider, {"CLOSESPIDER_PAGECOUNT": close_on})
        yield crawler.crawl(mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_pagecount"
        pagecount = crawler.stats.get_value("response_received_count")
        assert pagecount >= close_on

    @defer.inlineCallbacks
    def test_closespider_pagecount_no_item(self):
        close_on = 5
        max_items = 5
        max_requests = close_on + max_items
        crawler = get_crawler(
            MaxItemsAndRequestsSpider,
            {
                "CLOSESPIDER_PAGECOUNT_NO_ITEM": close_on,
            },
        )
        yield crawler.crawl(
            max_items=max_items, max_requests=max_requests, mockserver=self.mockserver
        )
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_pagecount_no_item"
        pagecount = crawler.stats.get_value("response_received_count")
        itemcount = crawler.stats.get_value("item_scraped_count")
        assert pagecount <= close_on + itemcount

    @defer.inlineCallbacks
    def test_closespider_pagecount_no_item_with_pagecount(self):
        close_on_pagecount_no_item = 5
        close_on_pagecount = 20
        crawler = get_crawler(
            FollowAllSpider,
            {
                "CLOSESPIDER_PAGECOUNT_NO_ITEM": close_on_pagecount_no_item,
                "CLOSESPIDER_PAGECOUNT": close_on_pagecount,
            },
        )
        yield crawler.crawl(mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_pagecount_no_item"
        pagecount = crawler.stats.get_value("response_received_count")
        assert pagecount < close_on_pagecount

    @defer.inlineCallbacks
    def test_closespider_errorcount(self):
        close_on = 5
        crawler = get_crawler(ErrorSpider, {"CLOSESPIDER_ERRORCOUNT": close_on})
        yield crawler.crawl(total=1000000, mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_errorcount"
        key = f"spider_exceptions/{crawler.spider.exception_cls.__name__}"
        errorcount = crawler.stats.get_value(key)
        assert crawler.stats.get_value("spider_exceptions/count") >= close_on
        assert errorcount >= close_on

    @defer.inlineCallbacks
    def test_closespider_timeout(self):
        close_on = 0.1
        crawler = get_crawler(FollowAllSpider, {"CLOSESPIDER_TIMEOUT": close_on})
        yield crawler.crawl(total=1000000, mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_timeout"
        total_seconds = crawler.stats.get_value("elapsed_time_seconds")
        assert total_seconds >= close_on

    @defer.inlineCallbacks
    def test_closespider_timeout_no_item(self):
        timeout = 1
        crawler = get_crawler(SlowSpider, {"CLOSESPIDER_TIMEOUT_NO_ITEM": timeout})
        yield crawler.crawl(n=3, mockserver=self.mockserver)
        reason = crawler.spider.meta["close_reason"]
        assert reason == "closespider_timeout_no_item"
        total_seconds = crawler.stats.get_value("elapsed_time_seconds")
        assert total_seconds >= timeout