1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
from twisted.trial.unittest import TestCase
from twisted.python.failure import Failure
from scrapy.http import Request, Response
from scrapy.spider import Spider
from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
from scrapy.utils.test import get_crawler
class ManagerTestCase(TestCase):
settings_dict = None
def setUp(self):
self.crawler = get_crawler(self.settings_dict)
self.spider = Spider('foo')
self.spider.set_crawler(self.crawler)
self.mwman = DownloaderMiddlewareManager.from_crawler(self.crawler)
# some mw depends on stats collector
self.crawler.stats.open_spider(self.spider)
return self.mwman.open_spider(self.spider)
def tearDown(self):
self.crawler.stats.close_spider(self.spider, '')
return self.mwman.close_spider(self.spider)
def _download(self, request, response=None):
"""Executes downloader mw manager's download method and returns
the result (Request or Response) or raise exception in case of
failure.
"""
if not response:
response = Response(request.url)
def download_func(**kwargs):
return response
dfd = self.mwman.download(download_func, request, self.spider)
# catch deferred result and return the value
results = []
dfd.addBoth(results.append)
self._wait(dfd)
ret = results[0]
if isinstance(ret, Failure):
ret.raiseException()
return ret
class DefaultsTest(ManagerTestCase):
"""Tests default behavior with default settings"""
def test_request_response(self):
req = Request('http://example.com/index.html')
resp = Response(req.url, status=200)
ret = self._download(req, resp)
self.assertTrue(isinstance(ret, Response), "Non-response returned")
def test_3xx_and_invalid_gzipped_body_must_redirect(self):
"""Regression test for a failure when redirecting a compressed
request.
This happens when httpcompression middleware is executed before redirect
middleware and attempts to decompress a non-compressed body.
In particular when some website returns a 30x response with header
'Content-Encoding: gzip' giving as result the error below:
exceptions.IOError: Not a gzipped file
"""
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=302, body=body, headers={
'Content-Length': len(body),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
ret = self._download(request=req, response=resp)
self.assertTrue(isinstance(ret, Request),
"Not redirected: {0!r}".format(ret))
self.assertEqual(ret.url, resp.headers['Location'],
"Not redirected to location header")
def test_200_and_invalid_gzipped_body_must_fail(self):
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=200, body=body, headers={
'Content-Length': len(body),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
self.assertRaises(IOError, self._download, request=req, response=resp)
|