import logging
from urllib.parse import urljoin, urlparse

from w3lib.url import safe_url_string

from scrapy.http import HtmlResponse
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.response import get_meta_refresh
from scrapy.exceptions import IgnoreRequest, NotConfigured


logger = logging.getLogger(__name__)


def _build_redirect_request(source_request, *, url, **kwargs):
    redirect_request = source_request.replace(
        url=url,
        **kwargs,
        cookies=None,
    )
    if 'Cookie' in redirect_request.headers:
        source_request_netloc = urlparse_cached(source_request).netloc
        redirect_request_netloc = urlparse_cached(redirect_request).netloc
        if source_request_netloc != redirect_request_netloc:
            del redirect_request.headers['Cookie']
    return redirect_request


class BaseRedirectMiddleware:

    enabled_setting = 'REDIRECT_ENABLED'

    def __init__(self, settings):
        if not settings.getbool(self.enabled_setting):
            raise NotConfigured

        self.max_redirect_times = settings.getint('REDIRECT_MAX_TIMES')
        self.priority_adjust = settings.getint('REDIRECT_PRIORITY_ADJUST')

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler.settings)

    def _redirect(self, redirected, request, spider, reason):
        ttl = request.meta.setdefault('redirect_ttl', self.max_redirect_times)
        redirects = request.meta.get('redirect_times', 0) + 1

        if ttl and redirects <= self.max_redirect_times:
            redirected.meta['redirect_times'] = redirects
            redirected.meta['redirect_ttl'] = ttl - 1
            redirected.meta['redirect_urls'] = request.meta.get('redirect_urls', []) + [request.url]
            redirected.meta['redirect_reasons'] = request.meta.get('redirect_reasons', []) + [reason]
            redirected.dont_filter = request.dont_filter
            redirected.priority = request.priority + self.priority_adjust
            logger.debug("Redirecting (%(reason)s) to %(redirected)s from %(request)s",
                         {'reason': reason, 'redirected': redirected, 'request': request},
                         extra={'spider': spider})
            return redirected
        else:
            logger.debug("Discarding %(request)s: max redirections reached",
                         {'request': request}, extra={'spider': spider})
            raise IgnoreRequest("max redirections reached")

    def _redirect_request_using_get(self, request, redirect_url):
        redirect_request = _build_redirect_request(
            request,
            url=redirect_url,
            method='GET',
            body='',
        )
        redirect_request.headers.pop('Content-Type', None)
        redirect_request.headers.pop('Content-Length', None)
        return redirect_request


class RedirectMiddleware(BaseRedirectMiddleware):
    """
    Handle redirection of requests based on response status
    and meta-refresh html tag.
    """

    def process_response(self, request, response, spider):
        if (
            request.meta.get('dont_redirect', False)
            or response.status in getattr(spider, 'handle_httpstatus_list', [])
            or response.status in request.meta.get('handle_httpstatus_list', [])
            or request.meta.get('handle_httpstatus_all', False)
        ):
            return response

        allowed_status = (301, 302, 303, 307, 308)
        if 'Location' not in response.headers or response.status not in allowed_status:
            return response

        location = safe_url_string(response.headers['Location'])
        if response.headers['Location'].startswith(b'//'):
            request_scheme = urlparse(request.url).scheme
            location = request_scheme + '://' + location.lstrip('/')

        redirected_url = urljoin(request.url, location)

        if response.status in (301, 307, 308) or request.method == 'HEAD':
            redirected = _build_redirect_request(request, url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        redirected = self._redirect_request_using_get(request, redirected_url)
        return self._redirect(redirected, request, spider, response.status)


class MetaRefreshMiddleware(BaseRedirectMiddleware):

    enabled_setting = 'METAREFRESH_ENABLED'

    def __init__(self, settings):
        super().__init__(settings)
        self._ignore_tags = settings.getlist('METAREFRESH_IGNORE_TAGS')
        self._maxdelay = settings.getint('METAREFRESH_MAXDELAY')

    def process_response(self, request, response, spider):
        if (
            request.meta.get('dont_redirect', False)
            or request.method == 'HEAD'
            or not isinstance(response, HtmlResponse)
        ):
            return response

        interval, url = get_meta_refresh(response,
                                         ignore_tags=self._ignore_tags)
        if url and interval < self._maxdelay:
            redirected = self._redirect_request_using_get(request, url)
            return self._redirect(redirected, request, spider, 'meta refresh')

        return response
