from itertools import chain, product

import pytest

from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
from scrapy.downloadermiddlewares.redirect import (
    MetaRefreshMiddleware,
    RedirectMiddleware,
)
from scrapy.exceptions import IgnoreRequest
from scrapy.http import HtmlResponse, Request, Response
from scrapy.spiders import Spider
from scrapy.utils.misc import set_environ
from scrapy.utils.test import get_crawler


class Base:
    class Test:
        def test_priority_adjust(self):
            req = Request("http://a.com")
            rsp = self.get_response(req, "http://a.com/redirected")
            req2 = self.mw.process_response(req, rsp, self.spider)
            assert req2.priority > req.priority

        def test_dont_redirect(self):
            url = "http://www.example.com/301"
            url2 = "http://www.example.com/redirected"
            req = Request(url, meta={"dont_redirect": True})
            rsp = self.get_response(req, url2)

            r = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(r, Response)
            assert r is rsp

            # Test that it redirects when dont_redirect is False
            req = Request(url, meta={"dont_redirect": False})
            rsp = self.get_response(req, url2)

            r = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(r, Request)

        def test_post(self):
            url = "http://www.example.com/302"
            url2 = "http://www.example.com/redirected2"
            req = Request(
                url,
                method="POST",
                body="test",
                headers={"Content-Type": "text/plain", "Content-length": "4"},
            )
            rsp = self.get_response(req, url2)

            req2 = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(req2, Request)
            assert req2.url == url2
            assert req2.method == "GET"
            assert "Content-Type" not in req2.headers, (
                "Content-Type header must not be present in redirected request"
            )
            assert "Content-Length" not in req2.headers, (
                "Content-Length header must not be present in redirected request"
            )
            assert not req2.body, f"Redirected body must be empty, not '{req2.body}'"

        def test_max_redirect_times(self):
            self.mw.max_redirect_times = 1
            req = Request("http://scrapytest.org/302")
            rsp = self.get_response(req, "/redirected")

            req = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(req, Request)
            assert "redirect_times" in req.meta
            assert req.meta["redirect_times"] == 1
            with pytest.raises(IgnoreRequest):
                self.mw.process_response(req, rsp, self.spider)

        def test_ttl(self):
            self.mw.max_redirect_times = 100
            req = Request("http://scrapytest.org/302", meta={"redirect_ttl": 1})
            rsp = self.get_response(req, "/a")

            req = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(req, Request)
            with pytest.raises(IgnoreRequest):
                self.mw.process_response(req, rsp, self.spider)

        def test_redirect_urls(self):
            req1 = Request("http://scrapytest.org/first")
            rsp1 = self.get_response(req1, "/redirected")
            req2 = self.mw.process_response(req1, rsp1, self.spider)
            rsp2 = self.get_response(req1, "/redirected2")
            req3 = self.mw.process_response(req2, rsp2, self.spider)

            assert req2.url == "http://scrapytest.org/redirected"
            assert req2.meta["redirect_urls"] == ["http://scrapytest.org/first"]
            assert req3.url == "http://scrapytest.org/redirected2"
            assert req3.meta["redirect_urls"] == [
                "http://scrapytest.org/first",
                "http://scrapytest.org/redirected",
            ]

        def test_redirect_reasons(self):
            req1 = Request("http://scrapytest.org/first")
            rsp1 = self.get_response(req1, "/redirected1")
            req2 = self.mw.process_response(req1, rsp1, self.spider)
            rsp2 = self.get_response(req2, "/redirected2")
            req3 = self.mw.process_response(req2, rsp2, self.spider)
            assert req2.meta["redirect_reasons"] == [self.reason]
            assert req3.meta["redirect_reasons"] == [self.reason, self.reason]

        def test_cross_origin_header_dropping(self):
            safe_headers = {"A": "B"}
            cookie_header = {"Cookie": "a=b"}
            authorization_header = {"Authorization": "Bearer 123456"}

            original_request = Request(
                "https://example.com",
                headers={**safe_headers, **cookie_header, **authorization_header},
            )

            # Redirects to the same origin (same scheme, same domain, same port)
            # keep all headers.
            internal_response = self.get_response(
                original_request, "https://example.com/a"
            )
            internal_redirect_request = self.mw.process_response(
                original_request, internal_response, self.spider
            )
            assert isinstance(internal_redirect_request, Request)
            assert original_request.headers == internal_redirect_request.headers

            # Redirects to the same origin (same scheme, same domain, same port)
            # keep all headers also when the scheme is http.
            http_request = Request(
                "http://example.com",
                headers={**safe_headers, **cookie_header, **authorization_header},
            )
            http_response = self.get_response(http_request, "http://example.com/a")
            http_redirect_request = self.mw.process_response(
                http_request, http_response, self.spider
            )
            assert isinstance(http_redirect_request, Request)
            assert http_request.headers == http_redirect_request.headers

            # For default ports, whether the port is explicit or implicit does not
            # affect the outcome, it is still the same origin.
            to_explicit_port_response = self.get_response(
                original_request, "https://example.com:443/a"
            )
            to_explicit_port_redirect_request = self.mw.process_response(
                original_request, to_explicit_port_response, self.spider
            )
            assert isinstance(to_explicit_port_redirect_request, Request)
            assert original_request.headers == to_explicit_port_redirect_request.headers

            # For default ports, whether the port is explicit or implicit does not
            # affect the outcome, it is still the same origin.
            to_implicit_port_response = self.get_response(
                original_request, "https://example.com/a"
            )
            to_implicit_port_redirect_request = self.mw.process_response(
                original_request, to_implicit_port_response, self.spider
            )
            assert isinstance(to_implicit_port_redirect_request, Request)
            assert original_request.headers == to_implicit_port_redirect_request.headers

            # A port change drops the Authorization header because the origin
            # changes, but keeps the Cookie header because the domain remains the
            # same.
            different_port_response = self.get_response(
                original_request, "https://example.com:8080/a"
            )
            different_port_redirect_request = self.mw.process_response(
                original_request, different_port_response, self.spider
            )
            assert isinstance(different_port_redirect_request, Request)
            assert {
                **safe_headers,
                **cookie_header,
            } == different_port_redirect_request.headers.to_unicode_dict()

            # A domain change drops both the Authorization and the Cookie header.
            external_response = self.get_response(
                original_request, "https://example.org/a"
            )
            external_redirect_request = self.mw.process_response(
                original_request, external_response, self.spider
            )
            assert isinstance(external_redirect_request, Request)
            assert safe_headers == external_redirect_request.headers.to_unicode_dict()

            # A scheme upgrade (http → https) drops the Authorization header
            # because the origin changes, but keeps the Cookie header because the
            # domain remains the same.
            upgrade_response = self.get_response(http_request, "https://example.com/a")
            upgrade_redirect_request = self.mw.process_response(
                http_request, upgrade_response, self.spider
            )
            assert isinstance(upgrade_redirect_request, Request)
            assert {
                **safe_headers,
                **cookie_header,
            } == upgrade_redirect_request.headers.to_unicode_dict()

            # A scheme downgrade (https → http) drops the Authorization header
            # because the origin changes, and the Cookie header because its value
            # cannot indicate whether the cookies were secure (HTTPS-only) or not.
            #
            # Note: If the Cookie header is set by the cookie management
            # middleware, as recommended in the docs, the dropping of Cookie on
            # scheme downgrade is not an issue, because the cookie management
            # middleware will add again the Cookie header to the new request if
            # appropriate.
            downgrade_response = self.get_response(
                original_request, "http://example.com/a"
            )
            downgrade_redirect_request = self.mw.process_response(
                original_request, downgrade_response, self.spider
            )
            assert isinstance(downgrade_redirect_request, Request)
            assert safe_headers == downgrade_redirect_request.headers.to_unicode_dict()

        def test_meta_proxy_http_absolute(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("http://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_meta_proxy_http_relative(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("http://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "/a")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "/a")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_meta_proxy_https_absolute(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("https://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_meta_proxy_https_relative(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("https://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "/a")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "/a")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_meta_proxy_http_to_https(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("http://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_meta_proxy_https_to_http(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            meta = {"proxy": "https://a:@a.example"}
            request1 = Request("https://example.com", meta=meta)
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_http_absolute(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_http_relative(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "/a")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "/a")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_https_absolute(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "https_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_https_relative(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "https_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "/a")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "/a")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_proxied_http_to_proxied_https(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
                "https_proxy": "https://b:@b.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request2.meta["_auth_proxy"] == "https://b.example"
            assert request2.meta["proxy"] == "https://b.example"

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_proxied_http_to_unproxied_https(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request1.meta["_auth_proxy"] == "https://a.example"
            assert request1.meta["proxy"] == "https://a.example"

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request3.meta["_auth_proxy"] == "https://a.example"
            assert request3.meta["proxy"] == "https://a.example"

        def test_system_proxy_unproxied_http_to_proxied_https(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "https_proxy": "https://b:@b.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert "Proxy-Authorization" not in request1.headers
            assert "_auth_proxy" not in request1.meta
            assert "proxy" not in request1.meta

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request2.meta["_auth_proxy"] == "https://b.example"
            assert request2.meta["proxy"] == "https://b.example"

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

        def test_system_proxy_unproxied_http_to_unproxied_https(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("http://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert "Proxy-Authorization" not in request1.headers
            assert "_auth_proxy" not in request1.meta
            assert "proxy" not in request1.meta

            response1 = self.get_response(request1, "https://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            response2 = self.get_response(request2, "http://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

        def test_system_proxy_proxied_https_to_proxied_http(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
                "https_proxy": "https://b:@b.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request1.meta["_auth_proxy"] == "https://b.example"
            assert request1.meta["proxy"] == "https://b.example"

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request3.meta["_auth_proxy"] == "https://b.example"
            assert request3.meta["proxy"] == "https://b.example"

        def test_system_proxy_proxied_https_to_unproxied_http(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "https_proxy": "https://b:@b.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert request1.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request1.meta["_auth_proxy"] == "https://b.example"
            assert request1.meta["proxy"] == "https://b.example"

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert request3.headers["Proxy-Authorization"] == b"Basic Yjo="
            assert request3.meta["_auth_proxy"] == "https://b.example"
            assert request3.meta["proxy"] == "https://b.example"

        def test_system_proxy_unproxied_https_to_proxied_http(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            env = {
                "http_proxy": "https://a:@a.example",
            }
            with set_environ(**env):
                proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert "Proxy-Authorization" not in request1.headers
            assert "_auth_proxy" not in request1.meta
            assert "proxy" not in request1.meta

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert request2.headers["Proxy-Authorization"] == b"Basic YTo="
            assert request2.meta["_auth_proxy"] == "https://a.example"
            assert request2.meta["proxy"] == "https://a.example"

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

        def test_system_proxy_unproxied_https_to_unproxied_http(self):
            crawler = get_crawler()
            redirect_mw = self.mwcls.from_crawler(crawler)
            proxy_mw = HttpProxyMiddleware.from_crawler(crawler)

            request1 = Request("https://example.com")
            spider = None
            proxy_mw.process_request(request1, spider)

            assert "Proxy-Authorization" not in request1.headers
            assert "_auth_proxy" not in request1.meta
            assert "proxy" not in request1.meta

            response1 = self.get_response(request1, "http://example.com")
            request2 = redirect_mw.process_response(request1, response1, spider)

            assert isinstance(request2, Request)
            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            proxy_mw.process_request(request2, spider)

            assert "Proxy-Authorization" not in request2.headers
            assert "_auth_proxy" not in request2.meta
            assert "proxy" not in request2.meta

            response2 = self.get_response(request2, "https://example.com")
            request3 = redirect_mw.process_response(request2, response2, spider)

            assert isinstance(request3, Request)
            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta

            proxy_mw.process_request(request3, spider)

            assert "Proxy-Authorization" not in request3.headers
            assert "_auth_proxy" not in request3.meta
            assert "proxy" not in request3.meta


class TestRedirectMiddleware(Base.Test):
    mwcls = RedirectMiddleware
    reason = 302

    def setup_method(self):
        self.crawler = get_crawler(Spider)
        self.spider = self.crawler._create_spider("foo")
        self.mw = self.mwcls.from_crawler(self.crawler)

    def get_response(self, request, location, status=302):
        headers = {"Location": location}
        return Response(request.url, status=status, headers=headers)

    def test_redirect_3xx_permanent(self):
        def _test(method, status=301):
            url = f"http://www.example.com/{status}"
            url2 = "http://www.example.com/redirected"
            req = Request(url, method=method)
            rsp = Response(url, headers={"Location": url2}, status=status)

            req2 = self.mw.process_response(req, rsp, self.spider)
            assert isinstance(req2, Request)
            assert req2.url == url2
            assert req2.method == method

            # response without Location header but with status code is 3XX should be ignored
            del rsp.headers["Location"]
            assert self.mw.process_response(req, rsp, self.spider) is rsp

        _test("GET")
        _test("POST")
        _test("HEAD")

        _test("GET", status=307)
        _test("POST", status=307)
        _test("HEAD", status=307)

        _test("GET", status=308)
        _test("POST", status=308)
        _test("HEAD", status=308)

    def test_redirect_302_head(self):
        url = "http://www.example.com/302"
        url2 = "http://www.example.com/redirected2"
        req = Request(url, method="HEAD")
        rsp = Response(url, headers={"Location": url2}, status=302)

        req2 = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(req2, Request)
        assert req2.url == url2
        assert req2.method == "HEAD"

    def test_redirect_302_relative(self):
        url = "http://www.example.com/302"
        url2 = "///i8n.example2.com/302"
        url3 = "http://i8n.example2.com/302"
        req = Request(url, method="HEAD")
        rsp = Response(url, headers={"Location": url2}, status=302)

        req2 = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(req2, Request)
        assert req2.url == url3
        assert req2.method == "HEAD"

    def test_spider_handling(self):
        smartspider = self.crawler._create_spider("smarty")
        smartspider.handle_httpstatus_list = [404, 301, 302]
        url = "http://www.example.com/301"
        url2 = "http://www.example.com/redirected"
        req = Request(url)
        rsp = Response(url, headers={"Location": url2}, status=301)
        r = self.mw.process_response(req, rsp, smartspider)
        assert r is rsp

    def test_request_meta_handling(self):
        url = "http://www.example.com/301"
        url2 = "http://www.example.com/redirected"

        def _test_passthrough(req):
            rsp = Response(url, headers={"Location": url2}, status=301, request=req)
            r = self.mw.process_response(req, rsp, self.spider)
            assert r is rsp

        _test_passthrough(
            Request(url, meta={"handle_httpstatus_list": [404, 301, 302]})
        )
        _test_passthrough(Request(url, meta={"handle_httpstatus_all": True}))

    def test_latin1_location(self):
        req = Request("http://scrapytest.org/first")
        latin1_location = "/ação".encode("latin1")  # HTTP historically supports latin1
        resp = Response(
            "http://scrapytest.org/first",
            headers={"Location": latin1_location},
            status=302,
        )
        req_result = self.mw.process_response(req, resp, self.spider)
        perc_encoded_utf8_url = "http://scrapytest.org/a%E7%E3o"
        assert perc_encoded_utf8_url == req_result.url

    def test_utf8_location(self):
        req = Request("http://scrapytest.org/first")
        utf8_location = "/ação".encode()  # header using UTF-8 encoding
        resp = Response(
            "http://scrapytest.org/first",
            headers={"Location": utf8_location},
            status=302,
        )
        req_result = self.mw.process_response(req, resp, self.spider)
        perc_encoded_utf8_url = "http://scrapytest.org/a%C3%A7%C3%A3o"
        assert perc_encoded_utf8_url == req_result.url

    def test_no_location(self):
        request = Request("https://example.com")
        response = Response(request.url, status=302)
        assert self.mw.process_response(request, response, self.spider) is response


SCHEME_PARAMS = ("url", "location", "target")
HTTP_SCHEMES = ("http", "https")
NON_HTTP_SCHEMES = ("data", "file", "ftp", "s3", "foo")
REDIRECT_SCHEME_CASES = (
    # http/https → http/https redirects
    *(
        (
            f"{input_scheme}://example.com/a",
            f"{output_scheme}://example.com/b",
            f"{output_scheme}://example.com/b",
        )
        for input_scheme, output_scheme in product(HTTP_SCHEMES, repeat=2)
    ),
    # http/https → data/file/ftp/s3/foo does not redirect
    *(
        (
            f"{input_scheme}://example.com/a",
            f"{output_scheme}://example.com/b",
            None,
        )
        for input_scheme in HTTP_SCHEMES
        for output_scheme in NON_HTTP_SCHEMES
    ),
    # http/https → relative redirects
    *(
        (
            f"{scheme}://example.com/a",
            location,
            f"{scheme}://example.com/b",
        )
        for scheme in HTTP_SCHEMES
        for location in ("//example.com/b", "/b")
    ),
    # Note: We do not test data/file/ftp/s3 schemes for the initial URL
    # because their download handlers cannot return a status code of 3xx.
)


@pytest.mark.parametrize(SCHEME_PARAMS, REDIRECT_SCHEME_CASES)
def test_redirect_schemes(url, location, target):
    crawler = get_crawler(Spider)
    spider = crawler._create_spider("foo")
    mw = RedirectMiddleware.from_crawler(crawler)
    request = Request(url)
    response = Response(url, headers={"Location": location}, status=301)
    redirect = mw.process_response(request, response, spider)
    if target is None:
        assert redirect == response
    else:
        assert isinstance(redirect, Request)
        assert redirect.url == target


def meta_refresh_body(url, interval=5):
    html = f"""<html><head><meta http-equiv="refresh" content="{interval};url={url}"/></head></html>"""
    return html.encode("utf-8")


class TestMetaRefreshMiddleware(Base.Test):
    mwcls = MetaRefreshMiddleware
    reason = "meta refresh"

    def setup_method(self):
        crawler = get_crawler(Spider)
        self.spider = crawler._create_spider("foo")
        self.mw = self.mwcls.from_crawler(crawler)

    def _body(self, interval=5, url="http://example.org/newpage"):
        return meta_refresh_body(url, interval)

    def get_response(self, request, location):
        return HtmlResponse(request.url, body=self._body(url=location))

    def test_meta_refresh(self):
        req = Request(url="http://example.org")
        rsp = HtmlResponse(req.url, body=self._body())
        req2 = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(req2, Request)
        assert req2.url == "http://example.org/newpage"

    def test_meta_refresh_with_high_interval(self):
        # meta-refresh with high intervals don't trigger redirects
        req = Request(url="http://example.org")
        rsp = HtmlResponse(
            url="http://example.org", body=self._body(interval=1000), encoding="utf-8"
        )
        rsp2 = self.mw.process_response(req, rsp, self.spider)
        assert rsp is rsp2

    def test_meta_refresh_trough_posted_request(self):
        req = Request(
            url="http://example.org",
            method="POST",
            body="test",
            headers={"Content-Type": "text/plain", "Content-length": "4"},
        )
        rsp = HtmlResponse(req.url, body=self._body())
        req2 = self.mw.process_response(req, rsp, self.spider)

        assert isinstance(req2, Request)
        assert req2.url == "http://example.org/newpage"
        assert req2.method == "GET"
        assert "Content-Type" not in req2.headers, (
            "Content-Type header must not be present in redirected request"
        )
        assert "Content-Length" not in req2.headers, (
            "Content-Length header must not be present in redirected request"
        )
        assert not req2.body, f"Redirected body must be empty, not '{req2.body}'"

    def test_ignore_tags_default(self):
        req = Request(url="http://example.org")
        body = (
            """<noscript><meta http-equiv="refresh" """
            """content="0;URL='http://example.org/newpage'"></noscript>"""
        )
        rsp = HtmlResponse(req.url, body=body.encode())
        response = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(response, Response)

    def test_ignore_tags_1_x_list(self):
        """Test that Scrapy 1.x behavior remains possible"""
        settings = {"METAREFRESH_IGNORE_TAGS": ["script", "noscript"]}
        crawler = get_crawler(Spider, settings)
        mw = MetaRefreshMiddleware.from_crawler(crawler)
        req = Request(url="http://example.org")
        body = (
            """<noscript><meta http-equiv="refresh" """
            """content="0;URL='http://example.org/newpage'"></noscript>"""
        )
        rsp = HtmlResponse(req.url, body=body.encode())
        response = mw.process_response(req, rsp, self.spider)
        assert isinstance(response, Response)


@pytest.mark.parametrize(
    SCHEME_PARAMS,
    [
        *REDIRECT_SCHEME_CASES,
        # data/file/ftp/s3/foo → * does not redirect
        *(
            (
                f"{input_scheme}://example.com/a",
                f"{output_scheme}://example.com/b",
                None,
            )
            for input_scheme in NON_HTTP_SCHEMES
            for output_scheme in chain(HTTP_SCHEMES, NON_HTTP_SCHEMES)
        ),
        # data/file/ftp/s3/foo → relative does not redirect
        *(
            (
                f"{scheme}://example.com/a",
                location,
                None,
            )
            for scheme in NON_HTTP_SCHEMES
            for location in ("//example.com/b", "/b")
        ),
    ],
)
def test_meta_refresh_schemes(url, location, target):
    crawler = get_crawler(Spider)
    spider = crawler._create_spider("foo")
    mw = MetaRefreshMiddleware.from_crawler(crawler)
    request = Request(url)
    response = HtmlResponse(url, body=meta_refresh_body(location))
    redirect = mw.process_response(request, response, spider)
    if target is None:
        assert redirect == response
    else:
        assert isinstance(redirect, Request)
        assert redirect.url == target
