File: http.py

package info (click to toggle)
python-scrapy 0.8-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,904 kB
  • ctags: 2,981
  • sloc: python: 15,349; xml: 199; makefile: 68; sql: 64; sh: 34
file content (50 lines) | stat: -rw-r--r-- 1,683 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Download handlers for http and https schemes"""

from twisted.internet import reactor

from scrapy.core import signals
from scrapy.core.exceptions import NotSupported
from scrapy.utils.signal import send_catch_log
from scrapy.utils.misc import load_object
from scrapy.conf import settings
from scrapy import optional_features

ssl_supported = 'ssl' in optional_features
if ssl_supported:
    from twisted.internet.ssl import ClientContextFactory


HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
default_timeout = settings.getint('DOWNLOAD_TIMEOUT')

def _create_factory(request, spider):
    def _download_signals(response):
        send_catch_log(signal=signals.request_uploaded, \
                sender='download_http', request=request, spider=spider)
        send_catch_log(signal=signals.response_downloaded, \
                sender='download_http', response=response, spider=spider)
        return response

    timeout = getattr(spider, "download_timeout", None) or default_timeout
    factory = HTTPClientFactory(request, timeout)
    factory.deferred.addCallbacks(_download_signals)
    return factory


def _connect(factory):
    host, port = factory.host, factory.port
    if factory.scheme == 'https':
        if ssl_supported:
            return reactor.connectSSL(host, port, factory, ClientContextFactory())
        raise NotSupported("HTTPS not supported: install pyopenssl library")
    else:
        return reactor.connectTCP(host, port, factory)


def download_http(request, spider):
    """Return a deferred for the HTTP download"""
    factory = _create_factory(request, spider)
    _connect(factory)
    return factory.deferred