File: test_addons.py

package info (click to toggle)
python-scrapy 2.13.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,664 kB
  • sloc: python: 52,028; xml: 199; makefile: 25; sh: 7
file content (207 lines) | stat: -rw-r--r-- 7,357 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import itertools
from typing import Any
from unittest.mock import patch

from twisted.internet.defer import inlineCallbacks
from twisted.trial import unittest

from scrapy import Spider
from scrapy.crawler import Crawler, CrawlerRunner
from scrapy.exceptions import NotConfigured
from scrapy.settings import BaseSettings, Settings
from scrapy.utils.test import get_crawler, get_reactor_settings


class SimpleAddon:
    def update_settings(self, settings):
        pass


def get_addon_cls(config: dict[str, Any]) -> type:
    class AddonWithConfig:
        def update_settings(self, settings: BaseSettings):
            settings.update(config, priority="addon")

    return AddonWithConfig


class CreateInstanceAddon:
    def __init__(self, crawler: Crawler) -> None:
        super().__init__()
        self.crawler = crawler
        self.config = crawler.settings.getdict("MYADDON")

    @classmethod
    def from_crawler(cls, crawler: Crawler):
        return cls(crawler)

    def update_settings(self, settings):
        settings.update(self.config, "addon")


class TestAddon:
    def test_update_settings(self):
        settings = BaseSettings()
        settings.set("KEY1", "default", priority="default")
        settings.set("KEY2", "project", priority="project")
        addon_config = {"KEY1": "addon", "KEY2": "addon", "KEY3": "addon"}
        testaddon = get_addon_cls(addon_config)()
        testaddon.update_settings(settings)
        assert settings["KEY1"] == "addon"
        assert settings["KEY2"] == "project"
        assert settings["KEY3"] == "addon"


class TestAddonManager(unittest.TestCase):
    def test_load_settings(self):
        settings_dict = {
            "ADDONS": {"tests.test_addons.SimpleAddon": 0},
        }
        crawler = get_crawler(settings_dict=settings_dict)
        manager = crawler.addons
        assert isinstance(manager.addons[0], SimpleAddon)

    def test_notconfigured(self):
        class NotConfiguredAddon:
            def update_settings(self, settings):
                raise NotConfigured

        settings_dict = {
            "ADDONS": {NotConfiguredAddon: 0},
        }
        crawler = get_crawler(settings_dict=settings_dict)
        manager = crawler.addons
        assert not manager.addons

    def test_load_settings_order(self):
        # Get three addons with different settings
        addonlist = []
        for i in range(3):
            addon = get_addon_cls({"KEY1": i})
            addon.number = i
            addonlist.append(addon)
        # Test for every possible ordering
        for ordered_addons in itertools.permutations(addonlist):
            expected_order = [a.number for a in ordered_addons]
            settings = {"ADDONS": {a: i for i, a in enumerate(ordered_addons)}}
            crawler = get_crawler(settings_dict=settings)
            manager = crawler.addons
            assert [a.number for a in manager.addons] == expected_order
            assert crawler.settings.getint("KEY1") == expected_order[-1]

    def test_build_from_crawler(self):
        settings_dict = {
            "ADDONS": {"tests.test_addons.CreateInstanceAddon": 0},
            "MYADDON": {"MYADDON_KEY": "val"},
        }
        crawler = get_crawler(settings_dict=settings_dict)
        manager = crawler.addons
        assert isinstance(manager.addons[0], CreateInstanceAddon)
        assert crawler.settings.get("MYADDON_KEY") == "val"

    def test_settings_priority(self):
        config = {
            "KEY": 15,  # priority=addon
        }
        settings_dict = {
            "ADDONS": {get_addon_cls(config): 1},
            **get_reactor_settings(),
        }
        crawler = get_crawler(settings_dict=settings_dict)
        assert crawler.settings.getint("KEY") == 15

        settings = Settings(settings_dict)
        settings.set("KEY", 0, priority="default")
        runner = CrawlerRunner(settings)
        crawler = runner.create_crawler(Spider)
        crawler._apply_settings()
        assert crawler.settings.getint("KEY") == 15

        settings_dict = {
            "KEY": 20,  # priority=project
            "ADDONS": {get_addon_cls(config): 1},
            **get_reactor_settings(),
        }
        settings = Settings(settings_dict)
        settings.set("KEY", 0, priority="default")
        runner = CrawlerRunner(settings)
        crawler = runner.create_crawler(Spider)
        assert crawler.settings.getint("KEY") == 20

    def test_fallback_workflow(self):
        FALLBACK_SETTING = "MY_FALLBACK_DOWNLOAD_HANDLER"

        class AddonWithFallback:
            def update_settings(self, settings):
                if not settings.get(FALLBACK_SETTING):
                    settings.set(
                        FALLBACK_SETTING,
                        settings.getwithbase("DOWNLOAD_HANDLERS")["https"],
                        "addon",
                    )
                settings["DOWNLOAD_HANDLERS"]["https"] = "AddonHandler"

        settings_dict = {
            "ADDONS": {AddonWithFallback: 1},
        }
        crawler = get_crawler(settings_dict=settings_dict)
        assert (
            crawler.settings.getwithbase("DOWNLOAD_HANDLERS")["https"] == "AddonHandler"
        )
        assert (
            crawler.settings.get(FALLBACK_SETTING)
            == "scrapy.core.downloader.handlers.http.HTTPDownloadHandler"
        )

        settings_dict = {
            "ADDONS": {AddonWithFallback: 1},
            "DOWNLOAD_HANDLERS": {"https": "UserHandler"},
        }
        crawler = get_crawler(settings_dict=settings_dict)
        assert (
            crawler.settings.getwithbase("DOWNLOAD_HANDLERS")["https"] == "AddonHandler"
        )
        assert crawler.settings.get(FALLBACK_SETTING) == "UserHandler"

    def test_logging_message(self):
        class LoggedAddon:
            def update_settings(self, settings):
                pass

        with (
            patch("scrapy.addons.logger") as logger_mock,
            patch("scrapy.addons.build_from_crawler") as build_from_crawler_mock,
        ):
            settings_dict = {
                "ADDONS": {LoggedAddon: 1},
            }
            addon = LoggedAddon()
            build_from_crawler_mock.return_value = addon
            crawler = get_crawler(settings_dict=settings_dict)
            logger_mock.info.assert_called_once_with(
                "Enabled addons:\n%(addons)s",
                {"addons": [addon]},
                extra={"crawler": crawler},
            )

    @inlineCallbacks
    def test_enable_addon_in_spider(self):
        class MySpider(Spider):
            name = "myspider"

            @classmethod
            def from_crawler(cls, crawler, *args, **kwargs):
                spider = super().from_crawler(crawler, *args, **kwargs)
                addon_config = {"KEY": "addon"}
                addon_cls = get_addon_cls(addon_config)
                spider.settings.set("ADDONS", {addon_cls: 1}, priority="spider")
                return spider

        settings = Settings()
        settings.setdict(get_reactor_settings())
        settings.set("KEY", "default", priority="default")
        runner = CrawlerRunner(settings)
        crawler = runner.create_crawler(MySpider)
        assert crawler.settings.get("KEY") == "default"
        yield crawler.crawl()
        assert crawler.settings.get("KEY") == "addon"