1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
from __future__ import with_statement
from unittest import TestCase
from os.path import join, abspath, dirname
from scrapy.spider import BaseSpider
from scrapy.http import Response, Request
from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
from scrapy.tests import tests_datadir
SAMPLEDIR = join(tests_datadir, 'compressed')
FORMAT = {
'gzip': ('html-gzip.bin', 'gzip'),
'rawdeflate': ('html-rawdeflate.bin', 'deflate'),
'zlibdeflate': ('html-zlibdeflate.bin', 'deflate'),
}
class HttpCompressionTest(TestCase):
def setUp(self):
self.spider = BaseSpider()
self.mw = HttpCompressionMiddleware()
def _getresponse(self, coding):
if coding not in FORMAT:
raise ValueError()
samplefile, contentencoding = FORMAT[coding]
with open(join(SAMPLEDIR, samplefile), 'rb') as sample:
body = sample.read()
headers = {
'Server': 'Yaws/1.49 Yet Another Web Server',
'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
'Content-Length': len(body),
'Content-Type': 'text/html',
'Content-Encoding': contentencoding,
}
response = Response('http://scrapytest.org/', body=body, headers=headers)
response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip,deflate'})
return response
def test_process_request(self):
request = Request('http://scrapytest.org')
assert 'Accept-Encoding' not in request.headers
self.mw.process_request(request, self.spider)
self.assertEqual(request.headers.get('Accept-Encoding'), 'gzip,deflate')
def test_process_response_gzip(self):
response = self._getresponse('gzip')
request = response.request
self.assertEqual(response.headers['Content-Encoding'], 'gzip')
newresponse = self.mw.process_response(request, response, self.spider)
assert newresponse is not response
assert newresponse.body.startswith('<!DOCTYPE')
assert 'Content-Encoding' not in newresponse.headers
def test_process_response_rawdeflate(self):
response = self._getresponse('rawdeflate')
request = response.request
self.assertEqual(response.headers['Content-Encoding'], 'deflate')
newresponse = self.mw.process_response(request, response, self.spider)
assert newresponse is not response
assert newresponse.body.startswith('<!DOCTYPE')
assert 'Content-Encoding' not in newresponse.headers
def test_process_response_zlibdelate(self):
response = self._getresponse('zlibdeflate')
request = response.request
self.assertEqual(response.headers['Content-Encoding'], 'deflate')
newresponse = self.mw.process_response(request, response, self.spider)
assert newresponse is not response
assert newresponse.body.startswith('<!DOCTYPE')
assert 'Content-Encoding' not in newresponse.headers
def test_process_response_plain(self):
response = Response('http://scrapytest.org', body='<!DOCTYPE...')
request = Request('http://scrapytest.org')
assert not response.headers.get('Content-Encoding')
newresponse = self.mw.process_response(request, response, self.spider)
assert newresponse is response
assert newresponse.body.startswith('<!DOCTYPE')
def test_multipleencodings(self):
response = self._getresponse('gzip')
response.headers['Content-Encoding'] = ['uuencode', 'gzip']
request = response.request
newresponse = self.mw.process_response(request, response, self.spider)
assert newresponse is not response
self.assertEqual(newresponse.headers.getlist('Content-Encoding'), ['uuencode'])
|