File: aws.py

package info (click to toggle)
python-scrapy 0.8-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,904 kB
  • ctags: 2,981
  • sloc: python: 15,349; xml: 199; makefile: 68; sql: 64; sh: 34
file content (72 lines) | stat: -rw-r--r-- 2,493 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Helper function for working with Amazon Web Services"""

import re
import time
import hmac
import base64
import hashlib
from urlparse import urlsplit

AMAZON_HEADER_PREFIX = 'x-amz-'

# generates the aws canonical string for the given parameters
def canonical_string(method, path, headers, expires=None):
    interesting_headers = {}
    for key in headers:
        lk = key.lower()
        if lk in set(['content-md5', 'content-type', 'date']) \
                or lk.startswith(AMAZON_HEADER_PREFIX):
            interesting_headers[lk] = headers[key].strip()

    # these keys get empty strings if they don't exist
    interesting_headers.setdefault('content-type', '')
    interesting_headers.setdefault('content-md5', '')

    # just in case someone used this.  it's not necessary in this lib.
    if 'x-amz-date' in interesting_headers:
        interesting_headers['date'] = ''

    # if you're using expires for query string auth, then it trumps date
    # (and x-amz-date)
    if expires:
        interesting_headers['date'] = str(expires)

    sorted_header_keys = interesting_headers.keys()
    sorted_header_keys.sort()

    buf = "%s\n" % method
    for key in sorted_header_keys:
        if key.startswith(AMAZON_HEADER_PREFIX):
            buf += "%s:%s\n" % (key, interesting_headers[key])
        else:
            buf += "%s\n" % interesting_headers[key]

    # don't include anything after the first ? in the resource...
    buf += "%s" % path.split('?')[0]

    # ...unless there is an acl or torrent parameter
    if re.search("[&?]acl($|=|&)", path):
        buf += "?acl"
    elif re.search("[&?]logging($|=|&)", path):
        buf += "?logging"
    elif re.search("[&?]torrent($|=|&)", path):
        buf += "?torrent"
    elif re.search("[&?]location($|=|&)", path):
        buf += "?location"

    return buf

def sign_request(req, accesskey, secretkey):
    if 'Date' not in req.headers:
        req.headers['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())

    parsed = urlsplit(req.url)
    bucket = parsed.hostname.replace('.s3.amazonaws.com','')
    key = '%s?%s' % (parsed.path, parsed.query) if parsed.query else parsed.path
    fqkey = '/%s%s' % (bucket, key)

    c_string = canonical_string(req.method, fqkey, req.headers)
    _hmac = hmac.new(secretkey, digestmod=hashlib.sha1)
    _hmac.update(c_string)
    b64_hmac = base64.encodestring(_hmac.digest()).strip()
    req.headers['Authorization'] = "AWS %s:%s" % (accesskey, b64_hmac)