1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
import inspect
import logging
from scrapy.spiders import Spider
from scrapy.utils.defer import deferred_from_coro
from scrapy.utils.misc import arg_to_iter
try:
from scrapy.utils.py36 import collect_asyncgen
except SyntaxError:
collect_asyncgen = None
logger = logging.getLogger(__name__)
def iterate_spider_output(result):
if collect_asyncgen and hasattr(inspect, 'isasyncgen') and inspect.isasyncgen(result):
d = deferred_from_coro(collect_asyncgen(result))
d.addCallback(iterate_spider_output)
return d
elif inspect.iscoroutine(result):
d = deferred_from_coro(result)
d.addCallback(iterate_spider_output)
return d
return arg_to_iter(result)
def iter_spider_classes(module):
"""Return an iterator over all spider classes defined in the given module
that can be instantiated (i.e. which have name)
"""
# this needs to be imported here until get rid of the spider manager
# singleton in scrapy.spider.spiders
from scrapy.spiders import Spider
for obj in vars(module).values():
if (
inspect.isclass(obj)
and issubclass(obj, Spider)
and obj.__module__ == module.__name__
and getattr(obj, 'name', None)
):
yield obj
def spidercls_for_request(spider_loader, request, default_spidercls=None,
log_none=False, log_multiple=False):
"""Return a spider class that handles the given Request.
This will look for the spiders that can handle the given request (using
the spider loader) and return a Spider class if (and only if) there is
only one Spider able to handle the Request.
If multiple spiders (or no spider) are found, it will return the
default_spidercls passed. It can optionally log if multiple or no spiders
are found.
"""
snames = spider_loader.find_by_request(request)
if len(snames) == 1:
return spider_loader.load(snames[0])
if len(snames) > 1 and log_multiple:
logger.error('More than one spider can handle: %(request)s - %(snames)s',
{'request': request, 'snames': ', '.join(snames)})
if len(snames) == 0 and log_none:
logger.error('Unable to find spider that handles: %(request)s',
{'request': request})
return default_spidercls
class DefaultSpider(Spider):
name = 'default'
|