File: default.py

package info (click to toggle)
python-scrapy 2.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,308 kB
  • sloc: python: 55,321; xml: 199; makefile: 25; sh: 7
file content (130 lines) | stat: -rw-r--r-- 3,618 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any

from itemadapter import ItemAdapter, is_item

from scrapy.contracts import Contract
from scrapy.exceptions import ContractFail
from scrapy.http import Request

if TYPE_CHECKING:
    from collections.abc import Callable


# contracts
class UrlContract(Contract):
    """Contract to set the url of the request (mandatory)
    @url http://scrapy.org
    """

    name = "url"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["url"] = self.args[0]
        return args


class CallbackKeywordArgumentsContract(Contract):
    """Contract to set the keyword arguments for the request.
    The value should be a JSON-encoded dictionary, e.g.:

    @cb_kwargs {"arg1": "some value"}
    """

    name = "cb_kwargs"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["cb_kwargs"] = json.loads(" ".join(self.args))
        return args


class MetadataContract(Contract):
    """Contract to set metadata arguments for the request.
    The value should be JSON-encoded dictionary, e.g.:

    @meta {"arg1": "some value"}
    """

    name = "meta"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["meta"] = json.loads(" ".join(self.args))
        return args


class ReturnsContract(Contract):
    """Contract to check the output of a callback

    general form:
    @returns request(s)/item(s) [min=1 [max]]

    e.g.:
    @returns request
    @returns request 2
    @returns request 2 10
    @returns request 0 10
    """

    name = "returns"
    object_type_verifiers: dict[str | None, Callable[[Any], bool]] = {
        "request": lambda x: isinstance(x, Request),
        "requests": lambda x: isinstance(x, Request),
        "item": is_item,
        "items": is_item,
    }

    def __init__(self, *args: Any, **kwargs: Any):
        super().__init__(*args, **kwargs)

        if len(self.args) not in [1, 2, 3]:
            raise ValueError(
                f"Incorrect argument quantity: expected 1, 2 or 3, got {len(self.args)}"
            )
        self.obj_name = self.args[0] or None
        self.obj_type_verifier = self.object_type_verifiers[self.obj_name]

        try:
            self.min_bound: float = int(self.args[1])
        except IndexError:
            self.min_bound = 1

        try:
            self.max_bound: float = int(self.args[2])
        except IndexError:
            self.max_bound = float("inf")

    def post_process(self, output: list[Any]) -> None:
        occurrences = 0
        for x in output:
            if self.obj_type_verifier(x):
                occurrences += 1

        assertion = self.min_bound <= occurrences <= self.max_bound

        if not assertion:
            if self.min_bound == self.max_bound:
                expected = str(self.min_bound)
            else:
                expected = f"{self.min_bound}..{self.max_bound}"

            raise ContractFail(
                f"Returned {occurrences} {self.obj_name}, expected {expected}"
            )


class ScrapesContract(Contract):
    """Contract to check presence of fields in scraped items
    @scrapes page_name page_body
    """

    name = "scrapes"

    def post_process(self, output: list[Any]) -> None:
        for x in output:
            if is_item(x):
                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
                if missing:
                    missing_fields = ", ".join(missing)
                    raise ContractFail(f"Missing fields: {missing_fields}")