1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
"""
SpiderCloseDelay is an extension that keeps a idle spiders open until a
configurable amount of idle time has elapsed
"""
from time import time
from scrapy.xlib.pydispatch import dispatcher
from collections import defaultdict
from scrapy.core import signals
from scrapy.core.engine import scrapyengine
from scrapy.core.exceptions import NotConfigured, DontCloseSpider
from scrapy.conf import settings
class SpiderCloseDelay(object):
def __init__(self):
self.delay = settings.getint('SPIDER_CLOSE_DELAY')
if not self.delay:
raise NotConfigured
self.opened_at = defaultdict(time)
dispatcher.connect(self.spider_idle, signal=signals.spider_idle)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
def spider_idle(self, spider):
try:
lastseen = scrapyengine.downloader.sites[spider].lastseen
except KeyError:
lastseen = None
if not lastseen:
lastseen = self.opened_at[spider]
if time() < lastseen + self.delay:
raise DontCloseSpider
def spider_closed(self, spider):
self.opened_at.pop(spider, None)
|