File: asyncio_deferred_signal.py

package info (click to toggle)
python-scrapy 2.13.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,664 kB
  • sloc: python: 52,028; xml: 199; makefile: 25; sh: 7
file content (48 lines) | stat: -rw-r--r-- 1,178 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from __future__ import annotations

import asyncio
import sys

from scrapy import Spider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.defer import deferred_from_coro


class UppercasePipeline:
    async def _open_spider(self, spider):
        spider.logger.info("async pipeline opened!")
        await asyncio.sleep(0.1)

    def open_spider(self, spider):
        return deferred_from_coro(self._open_spider(spider))

    def process_item(self, item, spider):
        return {"url": item["url"].upper()}


class UrlSpider(Spider):
    name = "url_spider"
    start_urls = ["data:,"]
    custom_settings = {
        "ITEM_PIPELINES": {UppercasePipeline: 100},
    }

    def parse(self, response):
        yield {"url": response.url}


if __name__ == "__main__":
    ASYNCIO_EVENT_LOOP: str | None
    try:
        ASYNCIO_EVENT_LOOP = sys.argv[1]
    except IndexError:
        ASYNCIO_EVENT_LOOP = None

    process = CrawlerProcess(
        settings={
            "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
            "ASYNCIO_EVENT_LOOP": ASYNCIO_EVENT_LOOP,
        }
    )
    process.crawl(UrlSpider)
    process.start()