File: default.py

package info (click to toggle)
python-scrapy 2.13.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,664 kB
  • sloc: python: 52,028; xml: 199; makefile: 25; sh: 7
file content (127 lines) | stat: -rw-r--r-- 3,553 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from __future__ import annotations

import json
from typing import Any, Callable

from itemadapter import ItemAdapter, is_item

from scrapy.contracts import Contract
from scrapy.exceptions import ContractFail
from scrapy.http import Request


# contracts
class UrlContract(Contract):
    """Contract to set the url of the request (mandatory)
    @url http://scrapy.org
    """

    name = "url"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["url"] = self.args[0]
        return args


class CallbackKeywordArgumentsContract(Contract):
    """Contract to set the keyword arguments for the request.
    The value should be a JSON-encoded dictionary, e.g.:

    @cb_kwargs {"arg1": "some value"}
    """

    name = "cb_kwargs"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["cb_kwargs"] = json.loads(" ".join(self.args))
        return args


class MetadataContract(Contract):
    """Contract to set metadata arguments for the request.
    The value should be JSON-encoded dictionary, e.g.:

    @meta {"arg1": "some value"}
    """

    name = "meta"

    def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
        args["meta"] = json.loads(" ".join(self.args))
        return args


class ReturnsContract(Contract):
    """Contract to check the output of a callback

    general form:
    @returns request(s)/item(s) [min=1 [max]]

    e.g.:
    @returns request
    @returns request 2
    @returns request 2 10
    @returns request 0 10
    """

    name = "returns"
    object_type_verifiers: dict[str | None, Callable[[Any], bool]] = {
        "request": lambda x: isinstance(x, Request),
        "requests": lambda x: isinstance(x, Request),
        "item": is_item,
        "items": is_item,
    }

    def __init__(self, *args: Any, **kwargs: Any):
        super().__init__(*args, **kwargs)

        if len(self.args) not in [1, 2, 3]:
            raise ValueError(
                f"Incorrect argument quantity: expected 1, 2 or 3, got {len(self.args)}"
            )
        self.obj_name = self.args[0] or None
        self.obj_type_verifier = self.object_type_verifiers[self.obj_name]

        try:
            self.min_bound: float = int(self.args[1])
        except IndexError:
            self.min_bound = 1

        try:
            self.max_bound: float = int(self.args[2])
        except IndexError:
            self.max_bound = float("inf")

    def post_process(self, output: list[Any]) -> None:
        occurrences = 0
        for x in output:
            if self.obj_type_verifier(x):
                occurrences += 1

        assertion = self.min_bound <= occurrences <= self.max_bound

        if not assertion:
            if self.min_bound == self.max_bound:
                expected = str(self.min_bound)
            else:
                expected = f"{self.min_bound}..{self.max_bound}"

            raise ContractFail(
                f"Returned {occurrences} {self.obj_name}, expected {expected}"
            )


class ScrapesContract(Contract):
    """Contract to check presence of fields in scraped items
    @scrapes page_name page_body
    """

    name = "scrapes"

    def post_process(self, output: list[Any]) -> None:
        for x in output:
            if is_item(x):
                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
                if missing:
                    missing_fields = ", ".join(missing)
                    raise ContractFail(f"Missing fields: {missing_fields}")