File: test_stats.py

package info (click to toggle)
python-scrapy 2.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,308 kB
  • sloc: python: 55,321; xml: 199; makefile: 25; sh: 7
file content (120 lines) | stat: -rw-r--r-- 4,473 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from __future__ import annotations

from datetime import datetime
from typing import TYPE_CHECKING
from unittest import mock

import pytest

from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.extensions.corestats import CoreStats
from scrapy.spiders import Spider
from scrapy.statscollectors import DummyStatsCollector, StatsCollector
from scrapy.utils.test import get_crawler

if TYPE_CHECKING:
    from scrapy.crawler import Crawler


@pytest.fixture
def crawler() -> Crawler:
    return get_crawler(Spider)


@pytest.fixture
def spider(crawler: Crawler) -> Spider:
    return crawler._create_spider("foo")


class TestCoreStatsExtension:
    @mock.patch("scrapy.extensions.corestats.datetime")
    def test_core_stats_default_stats_collector(
        self, mock_datetime: mock.Mock, crawler: Crawler, spider: Spider
    ) -> None:
        fixed_datetime = datetime(2019, 12, 1, 11, 38)
        mock_datetime.now = mock.Mock(return_value=fixed_datetime)
        crawler.stats = StatsCollector(crawler)
        ext = CoreStats.from_crawler(crawler)
        ext.spider_opened(spider)
        ext.item_scraped({}, spider)
        ext.response_received(spider)
        ext.item_dropped({}, spider, ZeroDivisionError())
        ext.spider_closed(spider, "finished")
        assert ext.stats._stats == {
            "start_time": fixed_datetime,
            "finish_time": fixed_datetime,
            "item_scraped_count": 1,
            "response_received_count": 1,
            "item_dropped_count": 1,
            "item_dropped_reasons_count/ZeroDivisionError": 1,
            "finish_reason": "finished",
            "elapsed_time_seconds": 0.0,
        }

    def test_core_stats_dummy_stats_collector(
        self, crawler: Crawler, spider: Spider
    ) -> None:
        crawler.stats = DummyStatsCollector(crawler)
        ext = CoreStats.from_crawler(crawler)
        ext.spider_opened(spider)
        ext.item_scraped({}, spider)
        ext.response_received(spider)
        ext.item_dropped({}, spider, ZeroDivisionError())
        ext.spider_closed(spider, "finished")
        assert ext.stats._stats == {}


class TestStatsCollector:
    def test_collector(self, crawler: Crawler) -> None:
        stats = StatsCollector(crawler)
        assert stats.get_stats() == {}
        assert stats.get_value("anything") is None
        assert stats.get_value("anything", "default") == "default"
        stats.set_value("test", "value")
        assert stats.get_stats() == {"test": "value"}
        stats.set_value("test2", 23)
        assert stats.get_stats() == {"test": "value", "test2": 23}
        assert stats.get_value("test2") == 23
        stats.inc_value("test2")
        assert stats.get_value("test2") == 24
        stats.inc_value("test2", 6)
        assert stats.get_value("test2") == 30
        stats.max_value("test2", 6)
        assert stats.get_value("test2") == 30
        stats.max_value("test2", 40)
        assert stats.get_value("test2") == 40
        stats.max_value("test3", 1)
        assert stats.get_value("test3") == 1
        stats.min_value("test2", 60)
        assert stats.get_value("test2") == 40
        stats.min_value("test2", 35)
        assert stats.get_value("test2") == 35
        stats.min_value("test4", 7)
        assert stats.get_value("test4") == 7

    def test_dummy_collector(self, crawler: Crawler) -> None:
        stats = DummyStatsCollector(crawler)
        assert stats.get_stats() == {}
        assert stats.get_value("anything") is None
        assert stats.get_value("anything", "default") == "default"
        stats.set_value("test", "value")
        stats.inc_value("v1")
        stats.max_value("v2", 100)
        stats.min_value("v3", 100)
        stats.open_spider()
        stats.set_value("test", "value")
        assert stats.get_stats() == {}

    def test_deprecated_spider_arg(self, crawler: Crawler, spider: Spider) -> None:
        stats = StatsCollector(crawler)
        with pytest.warns(
            ScrapyDeprecationWarning,
            match=r"Passing a 'spider' argument to StatsCollector.set_value\(\) is deprecated",
        ):
            stats.set_value("test", "value", spider=spider)
        assert stats.get_stats() == {"test": "value"}
        with pytest.warns(
            ScrapyDeprecationWarning,
            match=r"Passing a 'spider' argument to StatsCollector.get_stats\(\) is deprecated",
        ):
            assert stats.get_stats(spider) == {"test": "value"}