1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
import os
import sys
from twisted.trial.unittest import TestCase, SkipTest
from scrapy.contrib.downloadermiddleware.httpproxy import HttpProxyMiddleware
from scrapy.core.exceptions import NotConfigured
from scrapy.http import Response, Request
from scrapy.spider import BaseSpider
from scrapy.conf import settings
spider = BaseSpider()
class TestDefaultHeadersMiddleware(TestCase):
failureException = AssertionError
def setUp(self):
self._oldenv = os.environ.copy()
def tearDown(self):
os.environ = self._oldenv
def test_no_proxies(self):
os.environ = {}
self.assertRaises(NotConfigured, HttpProxyMiddleware)
def test_no_enviroment_proxies(self):
os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'}
mw = HttpProxyMiddleware()
for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
req = Request(url)
assert mw.process_request(req, spider) is None
self.assertEquals(req.url, url)
self.assertEquals(req.meta, {})
def test_enviroment_proxies(self):
os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
os.environ['https_proxy'] = https_proxy = 'http://proxy.for.https:8080'
os.environ.pop('file_proxy', None)
mw = HttpProxyMiddleware()
for url, proxy in [('http://e.com', http_proxy),
('https://e.com', https_proxy), ('file://tmp/a', None)]:
req = Request(url)
assert mw.process_request(req, spider) is None
self.assertEquals(req.url, url)
self.assertEquals(req.meta.get('proxy'), proxy)
def test_proxy_auth(self):
os.environ['http_proxy'] = 'https://user:pass@proxy:3128'
mw = HttpProxyMiddleware()
req = Request('http://scrapytest.org')
assert mw.process_request(req, spider) is None
self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'})
self.assertEquals(req.headers.get('Proxy-Authorization'), 'Basic dXNlcjpwYXNz')
def test_proxy_already_seted(self):
os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
mw = HttpProxyMiddleware()
req = Request('http://noproxy.com', meta={'proxy': None})
assert mw.process_request(req, spider) is None
assert 'proxy' in req.meta and req.meta['proxy'] is None
def test_no_proxy(self):
if sys.version_info < (2, 6):
raise SkipTest('no_proxy is not supported in python < 2.6')
os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
mw = HttpProxyMiddleware()
os.environ['no_proxy'] = '*'
req = Request('http://noproxy.com')
assert mw.process_request(req, spider) is None
assert 'proxy' not in req.meta
os.environ['no_proxy'] = 'other.com'
req = Request('http://noproxy.com')
assert mw.process_request(req, spider) is None
assert 'proxy' in req.meta
os.environ['no_proxy'] = 'other.com,noproxy.com'
req = Request('http://noproxy.com')
assert mw.process_request(req, spider) is None
assert 'proxy' not in req.meta
|