1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
"""Integration with Python standard library module urllib2: OpenerDirector
class.
Copyright 2004-2006 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
try: True
except NameError:
True = 1
False = 0
import urllib2, string, bisect, urlparse
from _Util import startswith, isstringlike
from _Request import Request
def methnames(obj):
"""Return method names of class instance.
dir(obj) doesn't work across Python versions, this does.
"""
return methnames_of_instance_as_dict(obj).keys()
def methnames_of_instance_as_dict(inst):
names = {}
names.update(methnames_of_class_as_dict(inst.__class__))
for methname in dir(inst):
candidate = getattr(inst, methname)
if callable(candidate):
names[methname] = None
return names
def methnames_of_class_as_dict(klass):
names = {}
for methname in dir(klass):
candidate = getattr(klass, methname)
if callable(candidate):
names[methname] = None
for baseclass in klass.__bases__:
names.update(methnames_of_class_as_dict(baseclass))
return names
class OpenerMixin:
def _request(self, url_or_req, data):
if isstringlike(url_or_req):
req = Request(url_or_req, data)
else:
# already a urllib2.Request or ClientCookie.Request instance
req = url_or_req
if data is not None:
req.add_data(data)
return req
def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
"""Returns (filename, headers).
For remote objects, the default filename will refer to a temporary
file.
"""
req = self._request(fullurl, data)
type_ = req.get_type()
fp = self.open(req)
headers = fp.info()
if filename is None and type == 'file':
return url2pathname(req.get_selector()), headers
if filename:
tfp = open(filename, 'wb')
else:
path = urlparse(fullurl)[2]
suffix = os.path.splitext(path)[1]
tfp = tempfile.TemporaryFile("wb", suffix=suffix)
result = filename, headers
bs = 1024*8
size = -1
read = 0
blocknum = 1
if reporthook:
if headers.has_key("content-length"):
size = int(headers["Content-Length"])
reporthook(0, bs, size)
while 1:
block = fp.read(bs)
read += len(block)
if reporthook:
reporthook(blocknum, bs, size)
blocknum = blocknum + 1
if not block:
break
tfp.write(block)
fp.close()
tfp.close()
del fp
del tfp
if size>=0 and read<size:
raise IOError("incomplete retrieval error",
"got only %d bytes out of %d" % (read,size))
return result
class OpenerDirector(urllib2.OpenerDirector, OpenerMixin):
def __init__(self):
urllib2.OpenerDirector.__init__(self)
self.process_response = {}
self.process_request = {}
def add_handler(self, handler):
added = False
for meth in methnames(handler):
i = string.find(meth, "_")
protocol = meth[:i]
condition = meth[i+1:]
if startswith(condition, "error"):
j = string.find(meth[i+1:], "_") + i + 1
kind = meth[j+1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = self.handle_error.get(protocol, {})
self.handle_error[protocol] = lookup
elif (condition == "open" and
protocol not in ["do", "proxy"]): # hack -- see below
kind = protocol
lookup = self.handle_open
elif (condition in ["response", "request"] and
protocol != "redirect"): # yucky hack
# hack above is to fix HTTPRedirectHandler problem, which
# appears to above line to be a processor because of the
# redirect_request method :-((
kind = protocol
lookup = getattr(self, "process_"+condition)
else:
continue
if lookup.has_key(kind):
bisect.insort(lookup[kind], handler)
else:
lookup[kind] = [handler]
added = True
continue
if added:
# XXX why does self.handlers need to be sorted?
bisect.insort(self.handlers, handler)
handler.add_parent(self)
def open(self, fullurl, data=None):
req = self._request(fullurl, data)
type_ = req.get_type()
# pre-process request
# XXX should we allow a Processor to change the type (URL
# scheme) of the request?
meth_name = type_+"_request"
for processor in self.process_request.get(type_, []):
meth = getattr(processor, meth_name)
req = meth(req)
response = urllib2.OpenerDirector.open(self, req, data)
# post-process response
meth_name = type_+"_response"
for processor in self.process_response.get(type_, []):
meth = getattr(processor, meth_name)
response = meth(req, response)
return response
def error(self, proto, *args):
if proto in ['http', 'https']:
# XXX http[s] protocols are special-cased
dict = self.handle_error['http'] # https is not different than http
proto = args[2] # YUCK!
meth_name = 'http_error_%s' % proto
http_err = 1
orig_args = args
else:
dict = self.handle_error
meth_name = proto + '_error'
http_err = 0
args = (dict, proto, meth_name) + args
result = apply(self._call_chain, args)
if result:
return result
if http_err:
args = (dict, 'default', 'http_error_default') + orig_args
return apply(self._call_chain, args)
|