File: http.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (36 lines) | stat: -rw-r--r-- 1,224 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""Download handlers for http and https schemes"""

from twisted.internet import reactor

from scrapy.exceptions import NotSupported
from scrapy.utils.misc import load_object
from scrapy.conf import settings
from scrapy import optional_features

ssl_supported = 'ssl' in optional_features
if ssl_supported:
    from twisted.internet.ssl import ClientContextFactory

HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])


class HttpDownloadHandler(object):

    def __init__(self, httpclientfactory=HTTPClientFactory):
        self.httpclientfactory = httpclientfactory

    def download_request(self, request, spider):
        """Return a deferred for the HTTP download"""
        factory = self.httpclientfactory(request)
        self._connect(factory)
        return factory.deferred

    def _connect(self, factory):
        host, port = factory.host, factory.port
        if factory.scheme == 'https':
            if ssl_supported:
                return reactor.connectSSL(host, port, factory, \
                        ClientContextFactory())
            raise NotSupported("HTTPS not supported: install pyopenssl library")
        else:
            return reactor.connectTCP(host, port, factory)