File: asyncio_deferred_signal.py

package info (click to toggle)
python-scrapy 2.4.1-2%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,748 kB
  • sloc: python: 32,888; xml: 199; makefile: 90; sh: 7
file content (44 lines) | stat: -rw-r--r-- 1,125 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import asyncio
import sys

from scrapy import Spider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.defer import deferred_from_coro
from twisted.internet.defer import Deferred


class UppercasePipeline:
    async def _open_spider(self, spider):
        spider.logger.info("async pipeline opened!")
        await asyncio.sleep(0.1)

    def open_spider(self, spider):
        return deferred_from_coro(self._open_spider(spider))

    def process_item(self, item, spider):
        return {"url": item["url"].upper()}


class UrlSpider(Spider):
    name = "url_spider"
    start_urls = ["data:,"]
    custom_settings = {
        "ITEM_PIPELINES": {UppercasePipeline: 100},
    }

    def parse(self, response):
        yield {"url": response.url}


if __name__ == "__main__":
    try:
        ASYNCIO_EVENT_LOOP = sys.argv[1]
    except IndexError:
        ASYNCIO_EVENT_LOOP = None

    process = CrawlerProcess(settings={
        "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
        "ASYNCIO_EVENT_LOOP": ASYNCIO_EVENT_LOOP,
    })
    process.crawl(UrlSpider)
    process.start()