File: form.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (75 lines) | stat: -rw-r--r-- 2,770 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
This module implements the FormRequest class which is a more covenient class
(than Request) to generate Requests based on form data.

See documentation in docs/topics/request-response.rst
"""

import urllib
from cStringIO import StringIO

from scrapy.xlib.ClientForm import ParseFile

from scrapy.http.request import Request
from scrapy.utils.python import unicode_to_str

def _unicode_to_str(string, encoding):
    if hasattr(string, '__iter__'):
        return [unicode_to_str(k, encoding) for k in string]
    else:
        return unicode_to_str(string, encoding)


class FormRequest(Request):

    def __init__(self, *args, **kwargs):
        formdata = kwargs.pop('formdata', None)
        super(FormRequest, self).__init__(*args, **kwargs)

        if formdata:
            items = formdata.iteritems() if isinstance(formdata, dict) else formdata
            query = [(unicode_to_str(k, self.encoding), _unicode_to_str(v, self.encoding))
                    for k, v in items]
            self.method = 'POST'
            self._set_body(urllib.urlencode(query, doseq=1))
            self.headers['Content-Type'] = 'application/x-www-form-urlencoded'

    @classmethod
    def from_response(cls, response, formname=None, formnumber=0, formdata=None, 
                      clickdata=None, dont_click=False, **kwargs):
        encoding = getattr(response, 'encoding', 'utf-8')
        forms = ParseFile(StringIO(response.body), response.url,
                          encoding=encoding, backwards_compat=False)
        if not forms:
            raise ValueError("No <form> element found in %s" % response)
        
        form = None

        if formname:
            for f in forms:
                if f.name == formname:
                    form = f
                    break

        if not form:
            try:
                form = forms[formnumber]
            except IndexError:
                raise IndexError("Form number %d not found in %s" % (formnumber, response))
        if formdata:
            # remove all existing fields with the same name before, so that
            # formdata fields properly can properly override existing ones,
            # which is the desired behaviour
            form.controls = [c for c in form.controls if c.name not in formdata]
            for k, v in formdata.iteritems():
                for v2 in v if hasattr(v, '__iter__') else [v]:
                    form.new_control('text', k, {'value': v2})

        if dont_click:
            url, body, headers = form._switch_click('request_data')
        else:
            url, body, headers = form.click_request_data(**(clickdata or {}))

        kwargs.setdefault('headers', {}).update(headers)

        return cls(url, method=form.method, body=body, **kwargs)