1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
|
"""
This module implements the Response class which is used to represent HTTP
responses in Scrapy.
See documentation in docs/topics/request-response.rst
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, AnyStr, TypeVar, overload
from urllib.parse import urljoin
from scrapy.exceptions import NotSupported
from scrapy.http.headers import Headers
from scrapy.http.request import Request
from scrapy.link import Link
from scrapy.utils.trackref import object_ref
if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Mapping
from ipaddress import IPv4Address, IPv6Address
from twisted.internet.ssl import Certificate
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.http.request import CallbackT, CookiesT
from scrapy.selector import SelectorList
ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")
class Response(object_ref):
"""An object that represents an HTTP response, which is usually
downloaded (by the Downloader) and fed to the Spiders for processing.
"""
attributes: tuple[str, ...] = (
"url",
"status",
"headers",
"body",
"flags",
"request",
"certificate",
"ip_address",
"protocol",
)
"""A tuple of :class:`str` objects containing the name of all public
attributes of the class that are also keyword parameters of the
``__init__()`` method.
Currently used by :meth:`Response.replace`.
"""
def __init__(
self,
url: str,
status: int = 200,
headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
body: bytes = b"",
flags: list[str] | None = None,
request: Request | None = None,
certificate: Certificate | None = None,
ip_address: IPv4Address | IPv6Address | None = None,
protocol: str | None = None,
):
self.headers: Headers = Headers(headers or {})
self.status: int = int(status)
self._set_body(body)
self._set_url(url)
self.request: Request | None = request
self.flags: list[str] = [] if flags is None else list(flags)
self.certificate: Certificate | None = certificate
self.ip_address: IPv4Address | IPv6Address | None = ip_address
self.protocol: str | None = protocol
@property
def cb_kwargs(self) -> dict[str, Any]:
try:
return self.request.cb_kwargs # type: ignore[union-attr]
except AttributeError:
raise AttributeError(
"Response.cb_kwargs not available, this response "
"is not tied to any request"
)
@property
def meta(self) -> dict[str, Any]:
try:
return self.request.meta # type: ignore[union-attr]
except AttributeError:
raise AttributeError(
"Response.meta not available, this response is not tied to any request"
)
@property
def url(self) -> str:
return self._url
def _set_url(self, url: str) -> None:
if isinstance(url, str):
self._url: str = url
else:
raise TypeError(
f"{type(self).__name__} url must be str, got {type(url).__name__}"
)
@property
def body(self) -> bytes:
return self._body
def _set_body(self, body: bytes | None) -> None:
if body is None:
self._body = b""
elif not isinstance(body, bytes):
raise TypeError(
"Response body must be bytes. "
"If you want to pass unicode body use TextResponse "
"or HtmlResponse."
)
else:
self._body = body
def __repr__(self) -> str:
return f"<{self.status} {self.url}>"
def copy(self) -> Self:
"""Return a copy of this Response"""
return self.replace()
@overload
def replace(
self, *args: Any, cls: type[ResponseTypeVar], **kwargs: Any
) -> ResponseTypeVar: ...
@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...
def replace(
self, *args: Any, cls: type[Response] | None = None, **kwargs: Any
) -> Response:
"""Create a new Response with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)
def urljoin(self, url: str) -> str:
"""Join this Response's url with a possible relative url to form an
absolute interpretation of the latter."""
return urljoin(self.url, url)
@property
def text(self) -> str:
"""For subclasses of TextResponse, this will return the body
as str
"""
raise AttributeError("Response content isn't text")
def css(self, *a: Any, **kw: Any) -> SelectorList:
"""Shortcut method implemented only by responses whose content
is text (subclasses of TextResponse).
"""
raise NotSupported("Response content isn't text")
def jmespath(self, *a: Any, **kw: Any) -> SelectorList:
"""Shortcut method implemented only by responses whose content
is text (subclasses of TextResponse).
"""
raise NotSupported("Response content isn't text")
def xpath(self, *a: Any, **kw: Any) -> SelectorList:
"""Shortcut method implemented only by responses whose content
is text (subclasses of TextResponse).
"""
raise NotSupported("Response content isn't text")
def follow(
self,
url: str | Link,
callback: CallbackT | None = None,
method: str = "GET",
headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
body: bytes | str | None = None,
cookies: CookiesT | None = None,
meta: dict[str, Any] | None = None,
encoding: str | None = "utf-8",
priority: int = 0,
dont_filter: bool = False,
errback: Callable[[Failure], Any] | None = None,
cb_kwargs: dict[str, Any] | None = None,
flags: list[str] | None = None,
) -> Request:
"""
Return a :class:`~.Request` instance to follow a link ``url``.
It accepts the same arguments as ``Request.__init__()`` method,
but ``url`` can be a relative URL or a :class:`~scrapy.link.Link` object,
not only an absolute URL.
:class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
method which supports selectors in addition to absolute/relative URLs
and Link objects.
.. versionadded:: 2.0
The *flags* parameter.
"""
if encoding is None:
raise ValueError("encoding can't be None")
if isinstance(url, Link):
url = url.url
elif url is None:
raise ValueError("url can't be None")
url = self.urljoin(url)
return Request(
url=url,
callback=callback,
method=method,
headers=headers,
body=body,
cookies=cookies,
meta=meta,
encoding=encoding,
priority=priority,
dont_filter=dont_filter,
errback=errback,
cb_kwargs=cb_kwargs,
flags=flags,
)
def follow_all(
self,
urls: Iterable[str | Link],
callback: CallbackT | None = None,
method: str = "GET",
headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
body: bytes | str | None = None,
cookies: CookiesT | None = None,
meta: dict[str, Any] | None = None,
encoding: str | None = "utf-8",
priority: int = 0,
dont_filter: bool = False,
errback: Callable[[Failure], Any] | None = None,
cb_kwargs: dict[str, Any] | None = None,
flags: list[str] | None = None,
) -> Iterable[Request]:
"""
.. versionadded:: 2.0
Return an iterable of :class:`~.Request` instances to follow all links
in ``urls``. It accepts the same arguments as ``Request.__init__()`` method,
but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
not only absolute URLs.
:class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all`
method which supports selectors in addition to absolute/relative URLs
and Link objects.
"""
if not hasattr(urls, "__iter__"):
raise TypeError("'urls' argument must be an iterable")
return (
self.follow(
url=url,
callback=callback,
method=method,
headers=headers,
body=body,
cookies=cookies,
meta=meta,
encoding=encoding,
priority=priority,
dont_filter=dont_filter,
errback=errback,
cb_kwargs=cb_kwargs,
flags=flags,
)
for url in urls
)
|