1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
|
"""
HTTP basic auth downloader middleware
See documentation in docs/topics/downloader-middleware.rst
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from w3lib.http import basic_auth_header
from scrapy import Request, Spider, signals
from scrapy.utils.url import url_is_from_any_domain
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
class HttpAuthMiddleware:
"""Set Basic HTTP Authorization header
(http_user and http_pass spider class attributes)"""
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
o = cls()
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
return o
def spider_opened(self, spider: Spider) -> None:
usr = getattr(spider, "http_user", "")
pwd = getattr(spider, "http_pass", "")
if usr or pwd:
self.auth = basic_auth_header(usr, pwd)
self.domain = spider.http_auth_domain # type: ignore[attr-defined]
def process_request(
self, request: Request, spider: Spider
) -> Request | Response | None:
auth = getattr(self, "auth", None)
if (
auth
and b"Authorization" not in request.headers
and (not self.domain or url_is_from_any_domain(request.url, [self.domain]))
):
request.headers[b"Authorization"] = auth
return None
|