File: default.py

package info (click to toggle)
python-scrapy 0.24.2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 3,240 kB
  • ctags: 4,259
  • sloc: python: 21,170; xml: 199; makefile: 67; sh: 44
file content (89 lines) | stat: -rw-r--r-- 2,310 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from scrapy.item import BaseItem
from scrapy.http import Request
from scrapy.exceptions import ContractFail

from . import Contract


# contracts
class UrlContract(Contract):
    """ Contract to set the url of the request (mandatory)
        @url http://scrapy.org
    """

    name = 'url'

    def adjust_request_args(self, args):
        args['url'] = self.args[0]
        return args


class ReturnsContract(Contract):
    """ Contract to check the output of a callback

        general form:
        @returns request(s)/item(s) [min=1 [max]]

        e.g.:
        @returns request
        @returns request 2
        @returns request 2 10
        @returns request 0 10
    """

    name = 'returns'
    objects = {
        'request': Request,
        'requests': Request,
        'item': BaseItem,
        'items': BaseItem,
    }

    def __init__(self, *args, **kwargs):
        super(ReturnsContract, self).__init__(*args, **kwargs)

        assert len(self.args) in [1, 2, 3]
        self.obj_name = self.args[0] or None
        self.obj_type = self.objects[self.obj_name]

        try:
            self.min_bound = int(self.args[1])
        except IndexError:
            self.min_bound = 1

        try:
            self.max_bound = int(self.args[2])
        except IndexError:
            self.max_bound = float('inf')

    def post_process(self, output):
        occurrences = 0
        for x in output:
            if isinstance(x, self.obj_type):
                occurrences += 1

        assertion = (self.min_bound <= occurrences <= self.max_bound)

        if not assertion:
            if self.min_bound == self.max_bound:
                expected = self.min_bound
            else:
                expected = '%s..%s' % (self.min_bound, self.max_bound)

            raise ContractFail("Returned %s %s, expected %s" % \
                (occurrences, self.obj_name, expected))


class ScrapesContract(Contract):
    """ Contract to check presence of fields in scraped items
        @scrapes page_name page_body
    """

    name = 'scrapes'

    def post_process(self, output):
        for x in output:
            if isinstance(x, BaseItem):
                for arg in self.args:
                    if not arg in x:
                        raise ContractFail("'%s' field is missing" % arg)