1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
from __future__ import annotations
import re
import sys
from collections.abc import AsyncGenerator, Iterable
from functools import wraps
from inspect import getmembers
from types import CoroutineType
from typing import TYPE_CHECKING, Any, cast
from unittest import TestCase, TestResult
from scrapy.http import Request, Response
from scrapy.utils.python import get_spec
from scrapy.utils.spider import iterate_spider_output
if TYPE_CHECKING:
from collections.abc import Callable
from twisted.python.failure import Failure
from scrapy import Spider
class Contract:
"""Abstract class for contracts"""
request_cls: type[Request] | None = None
name: str
def __init__(self, method: Callable, *args: Any):
self.testcase_pre = _create_testcase(method, f"@{self.name} pre-hook")
self.testcase_post = _create_testcase(method, f"@{self.name} post-hook")
self.args: tuple[Any, ...] = args
def add_pre_hook(self, request: Request, results: TestResult) -> Request:
if hasattr(self, "pre_process"):
cb = request.callback
assert cb is not None
@wraps(cb)
def wrapper(response: Response, **cb_kwargs: Any) -> list[Any]:
try:
results.startTest(self.testcase_pre)
self.pre_process(response)
results.stopTest(self.testcase_pre)
except AssertionError:
results.addFailure(self.testcase_pre, sys.exc_info())
except Exception:
results.addError(self.testcase_pre, sys.exc_info())
else:
results.addSuccess(self.testcase_pre)
cb_result = cb(response, **cb_kwargs)
if isinstance(cb_result, (AsyncGenerator, CoroutineType)):
raise TypeError("Contracts don't support async callbacks")
return list(cast(Iterable[Any], iterate_spider_output(cb_result)))
request.callback = wrapper
return request
def add_post_hook(self, request: Request, results: TestResult) -> Request:
if hasattr(self, "post_process"):
cb = request.callback
assert cb is not None
@wraps(cb)
def wrapper(response: Response, **cb_kwargs: Any) -> list[Any]:
cb_result = cb(response, **cb_kwargs)
if isinstance(cb_result, (AsyncGenerator, CoroutineType)):
raise TypeError("Contracts don't support async callbacks")
output = list(cast(Iterable[Any], iterate_spider_output(cb_result)))
try:
results.startTest(self.testcase_post)
self.post_process(output)
results.stopTest(self.testcase_post)
except AssertionError:
results.addFailure(self.testcase_post, sys.exc_info())
except Exception:
results.addError(self.testcase_post, sys.exc_info())
else:
results.addSuccess(self.testcase_post)
return output
request.callback = wrapper
return request
def adjust_request_args(self, args: dict[str, Any]) -> dict[str, Any]:
return args
class ContractsManager:
contracts: dict[str, type[Contract]] = {}
def __init__(self, contracts: Iterable[type[Contract]]):
for contract in contracts:
self.contracts[contract.name] = contract
def tested_methods_from_spidercls(self, spidercls: type[Spider]) -> list[str]:
is_method = re.compile(r"^\s*@", re.MULTILINE).search
methods = []
for key, value in getmembers(spidercls):
if callable(value) and value.__doc__ and is_method(value.__doc__):
methods.append(key)
return methods
def extract_contracts(self, method: Callable) -> list[Contract]:
contracts: list[Contract] = []
assert method.__doc__ is not None
for line in method.__doc__.split("\n"):
line = line.strip()
if line.startswith("@"):
m = re.match(r"@(\w+)\s*(.*)", line)
if m is None:
continue
name, args = m.groups()
args = re.split(r"\s+", args)
contracts.append(self.contracts[name](method, *args))
return contracts
def from_spider(self, spider: Spider, results: TestResult) -> list[Request | None]:
requests: list[Request | None] = []
for method in self.tested_methods_from_spidercls(type(spider)):
bound_method = spider.__getattribute__(method)
try:
requests.append(self.from_method(bound_method, results))
except Exception:
case = _create_testcase(bound_method, "contract")
results.addError(case, sys.exc_info())
return requests
def from_method(self, method: Callable, results: TestResult) -> Request | None:
contracts = self.extract_contracts(method)
if contracts:
request_cls = Request
for contract in contracts:
if contract.request_cls is not None:
request_cls = contract.request_cls
# calculate request args
args, kwargs = get_spec(request_cls.__init__)
# Don't filter requests to allow
# testing different callbacks on the same URL.
kwargs["dont_filter"] = True
kwargs["callback"] = method
for contract in contracts:
kwargs = contract.adjust_request_args(kwargs)
args.remove("self")
# check if all positional arguments are defined in kwargs
if set(args).issubset(set(kwargs)):
request = request_cls(**kwargs)
# execute pre and post hooks in order
for contract in reversed(contracts):
request = contract.add_pre_hook(request, results)
for contract in contracts:
request = contract.add_post_hook(request, results)
self._clean_req(request, method, results)
return request
return None
def _clean_req(
self, request: Request, method: Callable, results: TestResult
) -> None:
"""stop the request from returning objects and records any errors"""
cb = request.callback
assert cb is not None
@wraps(cb)
def cb_wrapper(response: Response, **cb_kwargs: Any) -> None:
try:
output = cb(response, **cb_kwargs)
output = list(cast(Iterable[Any], iterate_spider_output(output)))
except Exception:
case = _create_testcase(method, "callback")
results.addError(case, sys.exc_info())
def eb_wrapper(failure: Failure) -> None:
case = _create_testcase(method, "errback")
exc_info = failure.type, failure.value, failure.getTracebackObject()
results.addError(case, exc_info)
request.callback = cb_wrapper
request.errback = eb_wrapper
def _create_testcase(method: Callable, desc: str) -> TestCase:
spider = method.__self__.name # type: ignore[attr-defined]
class ContractTestCase(TestCase):
def __str__(_self) -> str: # pylint: disable=no-self-argument
return f"[{spider}] {method.__name__} ({desc})"
name = f"{spider}_{method.__name__}"
setattr(ContractTestCase, name, lambda x: x)
return ContractTestCase(name)
|