1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
"""
This module provides some commonly used processors for Item Loaders.
See documentation in docs/topics/loaders.rst
"""
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.datatypes import MergeDict
from .common import wrap_loader_context
class MapCompose(object):
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
values = arg_to_iter(value)
if loader_context:
context = MergeDict(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
for v in values:
next_values += arg_to_iter(func(v))
values = next_values
return values
class Compose(object):
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.stop_on_none = default_loader_context.get('stop_on_none', True)
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
if loader_context:
context = MergeDict(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
if value is None and self.stop_on_none:
break
value = func(value)
return value
class TakeFirst(object):
def __call__(self, values):
for value in values:
if value is not None and value != '':
return value
class Identity(object):
def __call__(self, values):
return values
class Join(object):
def __init__(self, separator=u' '):
self.separator = separator
def __call__(self, values):
return self.separator.join(values)
|