1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
from w3lib.url import parse_data_uri
from scrapy.http import TextResponse
from scrapy.responsetypes import responsetypes
from scrapy.utils.decorators import defers
class DataURIDownloadHandler(object):
def __init__(self, settings):
super(DataURIDownloadHandler, self).__init__()
@defers
def download_request(self, request, spider):
uri = parse_data_uri(request.url)
respcls = responsetypes.from_mimetype(uri.media_type)
resp_kwargs = {}
if (issubclass(respcls, TextResponse) and
uri.media_type.split('/')[0] == 'text'):
charset = uri.media_type_parameters.get('charset')
resp_kwargs['encoding'] = charset
return respcls(url=request.url, body=uri.data, **resp_kwargs)
|