File: url_helper.py

package info (click to toggle)
cloud-init 25.3-2
links: PTS, VCS
area: main
in suites: forky, sid
size: 12,412 kB
sloc: python: 135,894; sh: 3,883; makefile: 141; javascript: 30; xml: 22
file content (1141 lines) | stat: -rw-r--r-- 39,164 bytes
# Copyright (C) 2012 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2012 Yahoo! Inc.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# This file is part of cloud-init. See LICENSE file for license information.

import copy
import ftplib
import io
import json
import logging
import os
import threading
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed
from email.utils import parsedate
from functools import partial
from http.client import NOT_FOUND
from itertools import count
from ssl import create_default_context
from typing import (
    Any,
    Callable,
    Iterator,
    List,
    Mapping,
    NamedTuple,
    Optional,
    Tuple,
    Union,
)
from urllib.parse import quote, urlparse, urlsplit, urlunparse

import requests
from requests import exceptions

from cloudinit import performance, util, version

LOG = logging.getLogger(__name__)

REDACTED = "REDACTED"
ExceptionCallback = Optional[Callable[["UrlError"], bool]]


def _cleanurl(url):
    parsed_url = list(urlparse(url, scheme="http"))
    if not parsed_url[1] and parsed_url[2]:
        # Swap these since this seems to be a common
        # occurrence when given urls like 'www.google.com'
        parsed_url[1] = parsed_url[2]
        parsed_url[2] = ""
    return urlunparse(parsed_url)


def combine_url(base, *add_ons):
    def combine_single(url, add_on):
        url_parsed = list(urlparse(url))
        path = url_parsed[2]
        if path and not path.endswith("/"):
            path += "/"
        path += quote(str(add_on), safe="/:")
        url_parsed[2] = path
        return urlunparse(url_parsed)

    url = base
    for add_on in add_ons:
        url = combine_single(url, add_on)
    return url


def ftp_get_return_code_from_exception(exc) -> int:
    """helper for read_ftps to map return codes to a number"""
    # ftplib doesn't expose error codes, so use this lookup table
    ftp_error_codes = {
        ftplib.error_reply: 300,  # unexpected [123]xx reply
        ftplib.error_temp: 400,  # 4xx errors
        ftplib.error_perm: 500,  # 5xx errors
        ftplib.error_proto: 600,  # response does not begin with [1-5]
        EOFError: 700,  # made up
        # OSError is also possible. Use OSError.errno for that.
    }
    code = ftp_error_codes.get(type(exc))  # pyright: ignore
    if not code:
        if isinstance(exc, OSError) and exc.errno:
            code = exc.errno
        else:
            LOG.warning(
                "Unexpected exception type while connecting to ftp server."
            )
            code = -99
    return code


def read_ftps(url: str, timeout: float = 5.0, **kwargs: dict) -> "FtpResponse":
    """connect to URL using ftp over TLS and read a file

    when using strict mode (ftps://), raise exception in event of failure
    when not using strict mode (ftp://), fall back to using unencrypted ftp

    url: string containing the desination to read a file from. The url is
        parsed with urllib.urlsplit to identify username, password, host,
        path, and port in the following format:
            ftps://[username:password@]host[:port]/[path]
        host is the only required component
    timeout: maximum time for the connection to take
    kwargs: unused, for compatibility with read_url
    returns: UrlResponse
    """

    url_parts = urlsplit(url)
    if not url_parts.hostname:
        raise UrlError(
            cause="Invalid url provided", code=NOT_FOUND, headers=None, url=url
        )
    with io.BytesIO() as buffer:
        port = url_parts.port or 21
        user = url_parts.username or "anonymous"
        if "ftps" == url_parts.scheme:
            try:
                ftp_tls = ftplib.FTP_TLS(
                    context=create_default_context(),
                )
                LOG.debug(
                    "Attempting to connect to %s via port [%s] over tls.",
                    url,
                    port,
                )
                ftp_tls.connect(
                    host=url_parts.hostname,
                    port=port,
                    timeout=timeout or 5.0,  # uses float internally
                )
            except ftplib.all_errors as e:
                code = ftp_get_return_code_from_exception(e)
                raise UrlError(
                    cause=(
                        "Reading file from server over tls "
                        f"failed for url {url} [{code}]"
                    ),
                    code=code,
                    headers=None,
                    url=url,
                ) from e
            LOG.debug("Attempting to login with user [%s]", user)
            try:
                ftp_tls.login(
                    user=user,
                    passwd=url_parts.password or "",
                )
                LOG.debug("Creating a secure connection")
                ftp_tls.prot_p()
            except ftplib.error_perm as e:
                LOG.warning(
                    "Attempted to connect to an insecure ftp server but used "
                    "a scheme of ftps://, which is not allowed. Use ftp:// "
                    "to allow connecting to insecure ftp servers."
                )
                raise UrlError(
                    cause=(
                        "Attempted to connect to an insecure ftp server but "
                        "used a scheme of ftps://, which is not allowed. Use "
                        "ftp:// to allow connecting to insecure ftp servers."
                    ),
                    code=500,
                    headers=None,
                    url=url,
                ) from e
            try:
                LOG.debug("Reading file: %s", url_parts.path)
                ftp_tls.retrbinary(
                    f"RETR {url_parts.path}", callback=buffer.write
                )

                return FtpResponse(buffer.getvalue(), url)
            except ftplib.all_errors as e:
                code = ftp_get_return_code_from_exception(e)
                raise UrlError(
                    cause=(
                        "Reading file from ftp server"
                        f" failed for url {url} [{code}]"
                    ),
                    code=code,
                    headers=None,
                    url=url,
                ) from e
            finally:
                LOG.debug("Closing connection")
                ftp_tls.close()
        else:
            try:
                ftp = ftplib.FTP()
                LOG.debug(
                    "Attempting to connect to %s via port %s.", url, port
                )
                ftp.connect(
                    host=url_parts.hostname,
                    port=port,
                    timeout=timeout or 5.0,  # uses float internally
                )
                LOG.debug("Attempting to login with user [%s]", user)
                ftp.login(
                    user=user,
                    passwd=url_parts.password or "",
                )
                LOG.debug("Reading file: %s", url_parts.path)
                ftp.retrbinary(f"RETR {url_parts.path}", callback=buffer.write)
                return FtpResponse(buffer.getvalue(), url)
            except ftplib.all_errors as e:
                code = ftp_get_return_code_from_exception(e)
                raise UrlError(
                    cause=(
                        "Reading file from ftp server"
                        f" failed for url {url} [{code}]"
                    ),
                    code=code,
                    headers=None,
                    url=url,
                ) from e
            finally:
                LOG.debug("Closing connection")
                ftp.close()


def _read_file(path: str, **kwargs) -> "FileResponse":
    """read a binary file and return a FileResponse

    matches function signature with read_ftps and read_url
    """
    if kwargs.get("data"):
        LOG.warning("Unable to post data to file resource %s", path)
    try:
        contents = util.load_binary_file(path)
        return FileResponse(contents, path)
    except FileNotFoundError as e:
        raise UrlError(cause=e, code=NOT_FOUND, headers=None, url=path) from e
    except IOError as e:
        raise UrlError(cause=e, code=e.errno, headers=None, url=path) from e


def read_file_or_url(
    url, **kwargs
) -> Union["FileResponse", "UrlResponse", "FtpResponse"]:
    """Wrapper function around readurl to allow passing a file path as url.

    When url is not a local file path, passthrough any kwargs to readurl.

    In the case of parameter passthrough to readurl, default values for some
    parameters. See: call-signature of readurl in this module for param docs.
    """
    url = url.lstrip()
    try:
        parsed = urlparse(url)
    except ValueError as e:
        raise UrlError(cause=e, url=url) from e
    scheme = parsed.scheme
    if scheme == "file" or (url and "/" == url[0]):
        return _read_file(parsed.path, **kwargs)
    elif scheme in ("ftp", "ftps"):
        return read_ftps(url, **kwargs)
    elif scheme in ("http", "https"):
        return readurl(url, **kwargs)
    else:
        LOG.warning("Attempting unknown protocol %s", scheme)
        return readurl(url, **kwargs)


# Made to have same accessors as UrlResponse so that the
# read_file_or_url can return this or that object and the
# 'user' of those objects will not need to know the difference.
class StringResponse:
    def __init__(self, contents, url, code=200):
        self.code = code
        self.headers = {}
        self.contents = contents
        self.url = url

    def ok(self, *args, **kwargs):
        return self.code == 200

    def __str__(self):
        return self.contents.decode("utf-8")


class FileResponse(StringResponse):
    def __init__(self, contents: bytes, url: str, code=200):
        super().__init__(contents, url, code=code)


class FtpResponse(StringResponse):
    def __init__(self, contents: bytes, url: str):
        super().__init__(contents, url)


class UrlResponse:
    def __init__(self, response: requests.Response):
        self._response = response

    @property
    def contents(self) -> bytes:
        if self._response.content is None:
            return b""
        return self._response.content

    @property
    def url(self) -> str:
        return self._response.url

    def ok(self, redirects_ok=False) -> bool:
        upper = 300
        if redirects_ok:
            upper = 400
        if 200 <= self.code < upper:
            return True
        else:
            return False

    @property
    def headers(self):
        return self._response.headers

    @property
    def code(self) -> int:
        return self._response.status_code

    def __str__(self):
        return self._response.text

    def iter_content(
        self, chunk_size: Optional[int] = 1, decode_unicode: bool = False
    ) -> Iterator[bytes]:
        """Iterates over the response data.

        When stream=True is set on the request, this avoids reading the content
        at once into memory for large responses.

        :param chunk_size: Number of bytes it should read into memory.
        :param decode_unicode: If True, content will be decoded using the best
        available encoding based on the response.
        """
        yield from self._response.iter_content(chunk_size, decode_unicode)


class UrlError(IOError):
    def __init__(
        self,
        cause: Any,  # This SHOULD be an exception to wrap, but can be anything
        code: Optional[int] = None,
        headers: Optional[Mapping] = None,
        url: Optional[str] = None,
    ):
        IOError.__init__(self, str(cause))
        self.cause = cause
        self.code = code
        self.headers: Mapping = {} if headers is None else headers
        self.url = url


def _get_ssl_args(url, ssl_details):
    ssl_args = {}
    scheme = urlparse(url).scheme
    if scheme == "https" and ssl_details:
        if "ca_certs" in ssl_details and ssl_details["ca_certs"]:
            ssl_args["verify"] = ssl_details["ca_certs"]
        else:
            ssl_args["verify"] = True
        if "cert_file" in ssl_details and "key_file" in ssl_details:
            ssl_args["cert"] = [
                ssl_details["cert_file"],
                ssl_details["key_file"],
            ]
        elif "cert_file" in ssl_details:
            ssl_args["cert"] = str(ssl_details["cert_file"])
    return ssl_args


def _get_retry_after(retry_after: str) -> float:
    """Parse a Retry-After header value into an integer.

    : param retry_after: The value of the Retry-After header.
        https://www.rfc-editor.org/rfc/rfc9110.html#section-10.2.3
        https://www.rfc-editor.org/rfc/rfc2616#section-3.3
    : return: The number of seconds to wait before retrying the request.
    """
    try:
        to_wait = float(retry_after)
    except ValueError as exc:
        # Translate a date such as "Fri, 31 Dec 1999 23:59:59 GMT"
        # into seconds to wait
        try:
            time_tuple = parsedate(retry_after)
            if not time_tuple:
                raise ValueError(
                    "Failed to parse Retry-After header value"
                ) from exc
            to_wait = float(time.mktime(time_tuple) - time.time())
        except ValueError:
            LOG.info(
                "Failed to parse Retry-After header value: %s. "
                "Waiting 1 second instead.",
                retry_after,
            )
            to_wait = 1
        if to_wait < 0:
            LOG.info(
                "Retry-After header value is in the past. "
                "Waiting 1 second instead."
            )
            to_wait = 1
    return to_wait


def _handle_error(
    error: UrlError,
    *,
    exception_cb: ExceptionCallback = None,
) -> Optional[float]:
    """Handle exceptions raised during request processing.

    If we have no exception callback or the callback handled the error or we
    got a 503, return with an optional timeout so the request can be retried.
    Otherwise, raise the error.

    :param error: The exception raised during the request.
    :param response: The response object.
    :param exception_cb: Callable to handle the exception.

    :return: Optional time to wait before retrying the request.
    """
    if exception_cb and exception_cb(error):
        return None
    if error.code and error.code == 503:
        LOG.warning(
            "Endpoint returned a 503 error. "
            "HTTP endpoint is overloaded. Retrying URL (%s).",
            error.url,
        )
        if error.headers:
            return _get_retry_after(error.headers.get("Retry-After", "1"))
        LOG.info("Unable to introspect response header. Waiting 1 second.")
        return 1
    if not exception_cb:
        return None
    # If exception_cb returned False and there's no 503
    raise error


def readurl(
    url,
    *,
    data=None,
    timeout=None,
    retries=0,
    sec_between=1,
    headers=None,
    headers_cb=None,
    headers_redact=None,
    ssl_details=None,
    check_status=True,
    allow_redirects=True,
    exception_cb: ExceptionCallback = None,
    session=None,
    infinite=False,
    log_req_resp=True,
    request_method="",
    stream: bool = False,
) -> UrlResponse:
    """Wrapper around requests.Session to read the url and retry if necessary

    :param url: Mandatory url to request.
    :param data: Optional form data to post the URL. Will set request_method
        to 'POST' if present.
    :param timeout: Timeout in seconds to wait for a response. May be a tuple
        if specifying (connection timeout, read timeout).
    :param retries: Number of times to retry on exception if exception_cb is
        None or exception_cb returns True for the exception caught. Default is
        to fail with 0 retries on exception.
    :param sec_between: Default 1: amount of seconds passed to time.sleep
        between retries. None or -1 means don't sleep.
    :param headers: Optional dict of headers to send during request
    :param headers_cb: Optional callable returning a dict of values to send as
        headers during request
    :param headers_redact: Optional list of header names to redact from the log
    :param ssl_details: Optional dict providing key_file, ca_certs, and
        cert_file keys for use on in ssl connections.
    :param check_status: Optional boolean set True to raise when HTTPError
        occurs. Default: True.
    :param allow_redirects: Optional boolean passed straight to Session.request
        as 'allow_redirects'. Default: True.
    :param exception_cb: Optional callable to handle exception and returns
        True if retries are permitted.
    :param session: Optional exiting requests.Session instance to reuse.
    :param infinite: Bool, set True to retry indefinitely. Default: False.
    :param log_req_resp: Set False to turn off verbose debug messages.
    :param request_method: String passed as 'method' to Session.request.
        Typically GET, or POST. Default: POST if data is provided, GET
        otherwise.
    :param stream: if False, the response content will be immediately
    downloaded.
    """
    url = _cleanurl(url)
    req_args = {
        "url": url,
        "stream": stream,
    }
    req_args.update(_get_ssl_args(url, ssl_details))
    req_args["allow_redirects"] = allow_redirects
    if not request_method:
        request_method = "POST" if data else "GET"
    req_args["method"] = request_method
    if timeout is not None:
        if isinstance(timeout, tuple):
            req_args["timeout"] = timeout
        else:
            req_args["timeout"] = max(float(timeout), 0)
    if headers_redact is None:
        headers_redact = []
    manual_tries = 1
    if retries:
        manual_tries = max(int(retries) + 1, 1)

    user_agent = "Cloud-Init/%s" % (version.version_string())
    if headers is not None:
        headers = headers.copy()
    else:
        headers = {}

    if data:
        req_args["data"] = data
    if sec_between is None:
        sec_between = -1

    if session is None:
        session = requests.Session()

    # Handle retrying ourselves since the built-in support
    # doesn't handle sleeping between tries...
    for i in count():
        if headers_cb:
            headers = headers_cb(url)

        if "User-Agent" not in headers:
            headers["User-Agent"] = user_agent

        req_args["headers"] = headers
        filtered_req_args = {}
        for k, v in req_args.items():
            if k == "data":
                continue
            if k == "headers" and headers_redact:
                matched_headers = [k for k in headers_redact if v.get(k)]
                if matched_headers:
                    filtered_req_args[k] = copy.deepcopy(v)
                    for key in matched_headers:
                        filtered_req_args[k][key] = REDACTED
            else:
                filtered_req_args[k] = v
        raised_exception: Exception
        try:
            if log_req_resp:
                LOG.debug(
                    "[%s/%s] open '%s' with %s configuration",
                    i,
                    "infinite" if infinite else manual_tries,
                    url,
                    filtered_req_args,
                )

            response = session.request(**req_args)

            if check_status:
                response.raise_for_status()
            LOG.debug(
                "Read from %s (%s, %sb) after %s attempts",
                url,
                response.status_code,
                len(response.content),
                (i + 1),
            )
            # Doesn't seem like we can make it use a different
            # subclass for responses, so add our own backward-compat
            # attrs
            return UrlResponse(response)
        except exceptions.SSLError as e:
            # ssl exceptions are not going to get fixed by waiting a
            # few seconds
            raise UrlError(e, url=url) from e
        except exceptions.HTTPError as e:
            url_error = UrlError(
                e,
                code=e.response.status_code,
                headers=e.response.headers,
                url=url,
            )
            raised_exception = e
        except exceptions.RequestException as e:
            url_error = UrlError(e, url=url)
            raised_exception = e
            response = None

        response_sleep_time = _handle_error(
            url_error,
            exception_cb=exception_cb,
        )
        # If our response tells us to wait, then wait even if we're
        # past the max tries
        if not response_sleep_time:
            will_retry = infinite or (i + 1 < manual_tries)
            if not will_retry:
                raise url_error from raised_exception
        sleep_time = response_sleep_time or sec_between

        if sec_between > 0:
            if log_req_resp:
                LOG.debug(
                    "Please wait %s seconds while we wait to try again",
                    sec_between,
                )
            time.sleep(sleep_time)

    raise RuntimeError("This path should be unreachable...")


def _run_func_with_delay(
    func: Callable[..., Any],
    addr: str,
    timeout: int,
    event: threading.Event,
    delay: Optional[float] = None,
) -> Any:
    """Execute func with optional delay"""
    if delay:

        # event returns True iff the flag is set to true: indicating that
        # another thread has already completed successfully, no need to try
        # again - exit early
        if event.wait(timeout=delay):
            return
    return func(addr, timeout)


def dual_stack(
    func: Callable[..., Any],
    addresses: List[str],
    stagger_delay: float = 0.150,
    timeout: int = 10,
) -> Tuple[Optional[str], Optional[UrlResponse]]:
    """execute multiple callbacks in parallel

    Run blocking func against two different addresses staggered with a
    delay. The first call to return successfully is returned from this
    function and remaining unfinished calls are cancelled if they have not
    yet started
    """
    return_result = None
    returned_address = None
    last_exception: Optional[BaseException] = None
    exceptions = []
    is_done = threading.Event()

    # future work: add cancel_futures to Python stdlib ThreadPoolExecutor
    # context manager implementation
    #
    # for now we don't use this feature since it only supports python >3.8
    # and doesn't provide a context manager and only marginal benefit
    executor = ThreadPoolExecutor(max_workers=len(addresses))
    try:
        futures = {
            executor.submit(
                _run_func_with_delay,
                func=func,
                addr=addr,
                timeout=timeout,
                event=is_done,
                delay=(i * stagger_delay),
            ): addr
            for i, addr in enumerate(addresses)
        }

        # handle returned requests in order of completion
        for future in as_completed(futures, timeout=timeout):

            returned_address = futures[future]
            return_exception = future.exception()
            if return_exception:
                last_exception = return_exception
                exceptions.append(last_exception)
            else:
                return_result = future.result()
                if return_result:

                    # communicate to other threads that they do not need to
                    # try: this thread has already succeeded
                    is_done.set()
                    return (returned_address, return_result)

        # No success, return the last exception but log them all for
        # debugging
        if last_exception:
            LOG.debug(
                "Exception(s) %s during request to "
                "%s, raising last exception",
                exceptions,
                returned_address,
            )
            raise last_exception
        else:
            LOG.error("Empty result for address %s", returned_address)
            raise ValueError("No result returned")

    # when max_wait expires, log but don't throw (retries happen)
    except TimeoutError:
        LOG.debug(
            "Timed out waiting for addresses: %s, "
            "exception(s) raised while waiting: %s",
            " ".join(addresses),
            " ".join(map(str, exceptions)),
        )
    finally:
        executor.shutdown(wait=False)

    return (returned_address, return_result)


class HandledResponse(NamedTuple):
    # Set when we have a response to return
    url: Optional[str]
    response: Optional[UrlResponse]

    # Possibly set if we need to try again
    wait_time: Optional[float]


def wait_for_url(
    urls,
    *,
    max_wait: float = float("inf"),
    timeout: Optional[float] = None,
    status_cb: Callable = LOG.debug,  # some sources use different log levels
    headers_cb: Optional[Callable] = None,
    headers_redact=None,
    sleep_time: Optional[float] = None,
    exception_cb: ExceptionCallback = None,
    sleep_time_cb: Optional[Callable[[Any, float], float]] = None,
    request_method: str = "",
    connect_synchronously: bool = True,
    async_delay: float = 0.150,
):
    """Wait for a response from one of the urls provided.

    :param urls: List of urls to try
    :param max_wait: Roughly the maximum time to wait before giving up
        The max time is *actually* len(urls)*timeout as each url will
        be tried once and given the timeout provided.
        a number <= 0 will always result in only one try
    :param timeout: Timeout provided to urlopen
    :param status_cb: Callable with string message when a url is not available
    :param headers_cb: Callable with single argument of url to get headers
        for request.
    :param headers_redact: List of header names to redact from the log
    :param sleep_time: Amount of time to sleep between retries. If this and
        sleep_time_cb are None, the default sleep time defaults to 1 second
        and increases by 1 seconds every 5 tries. Cannot be specified along
        with `sleep_time_cb`.
    :param exception_cb: Callable to handle exception and returns True if
        retries are permitted.
    :param sleep_time_cb: Callable with 2 arguments (response, loop_n) that
        generates the next sleep time. Cannot be specified
        along with 'sleep_time`.
    :param request_method: Indicates the type of HTTP request:
        GET, PUT, or POST
    :param connect_synchronously: If false, enables executing requests
        in parallel
    :param async_delay: Delay before parallel metadata requests, see RFC 6555

    :return: tuple of (url, response contents), on failure, (False, None)

    :raises: UrlError on unrecoverable error
    """

    def default_sleep_time(_, loop_number: int) -> float:
        return sleep_time if sleep_time is not None else loop_number // 5 + 1

    def timeup(max_wait: float, start_time: float, sleep_time: float = 0):
        """Check if time is up based on start time and max wait"""
        if max_wait in (float("inf"), None):
            return False
        return (max_wait <= 0) or (
            time.monotonic() - start_time + sleep_time > max_wait
        )

    def handle_url_response(
        response: Optional[UrlResponse], url: Optional[str]
    ) -> Tuple[Optional[UrlError], str]:
        """Map requests response code/contents to internal "UrlError" type"""
        reason = ""
        url_exc = None
        if not (response and url):
            reason = "Request timed out"
            url_exc = UrlError(ValueError(reason))
            return url_exc, reason
        try:
            # Do this first because it can provide more context for the
            # exception than what comes later
            response._response.raise_for_status()
        except requests.exceptions.HTTPError as e:
            url_exc = UrlError(
                e,
                code=e.response.status_code,
                headers=e.response.headers,
                url=url,
            )
            return url_exc, str(e)
        if not response.contents:
            reason = "empty response [%s]" % (response.code)
            url_exc = UrlError(
                ValueError(reason),
                code=response.code,
                headers=response.headers,
                url=url,
            )
        elif not response.ok():
            # 3xx "errors" wouldn't be covered by the raise_for_status above
            reason = "bad status code [%s]" % (response.code)
            url_exc = UrlError(
                ValueError(reason),
                code=response.code,
                headers=response.headers,
                url=url,
            )
        return (url_exc, reason)

    def read_url_handle_exceptions(
        url_reader_cb: Callable[
            [Any], Tuple[Optional[str], Optional[UrlResponse]]
        ],
        urls: Union[str, List[str]],
        start_time: int,
        exc_cb: ExceptionCallback,
        log_cb: Callable,
    ) -> HandledResponse:
        """Execute request, handle response, optionally log exception"""
        reason = ""
        url = None
        url_exc: Optional[Exception]
        try:
            url, response = url_reader_cb(urls)
            url_exc, reason = handle_url_response(response, url)
            if not url_exc:
                return HandledResponse(url, response, wait_time=None)
        except UrlError as e:
            reason = "request error [%s]" % e
            url_exc = e
        except Exception as e:
            reason = "unexpected error [%s]" % e
            url_exc = e
        time_taken = int(time.monotonic() - start_time)
        max_wait_str = "%ss" % max_wait if max_wait else "unlimited"
        status_msg = "Calling '%s' failed [%s/%s]: %s" % (
            url or getattr(url_exc, "url", "url"),
            time_taken,
            max_wait_str,
            reason,
        )
        log_cb(status_msg)

        return HandledResponse(
            url=None,
            response=None,
            wait_time=(
                _handle_error(url_exc, exception_cb=exc_cb)
                if isinstance(url_exc, UrlError)
                else None
            ),
        )

    def read_url_cb(url: str, timeout: int) -> UrlResponse:
        return readurl(
            url,
            headers={} if headers_cb is None else headers_cb(url),
            headers_redact=headers_redact,
            timeout=timeout,
            check_status=False,
            request_method=request_method,
        )

    def read_url_serial(
        start_time, timeout, exc_cb, log_cb
    ) -> HandledResponse:
        """iterate over list of urls, request each one and handle responses
        and thrown exceptions individually per url
        """

        def url_reader_serial(url: str):
            return (url, read_url_cb(url, timeout))

        wait_times = []
        for url in urls:
            now = time.monotonic()
            if loop_n != 0 and not must_try_again:
                if timeup(max_wait, start_time):
                    return HandledResponse(
                        url=None, response=None, wait_time=None
                    )
                if (
                    max_wait is not None
                    and timeout
                    and (now + timeout > (start_time + max_wait))
                ):
                    # shorten timeout to not run way over max_time
                    timeout = int((start_time + max_wait) - now)

            out = read_url_handle_exceptions(
                url_reader_serial, url, start_time, exc_cb, log_cb
            )
            if out.response:
                return out
            elif out.wait_time:
                wait_times.append(out.wait_time)
        wait_time = max(wait_times) if wait_times else None
        return HandledResponse(url=None, response=None, wait_time=wait_time)

    def read_url_parallel(
        start_time, timeout, exc_cb, log_cb
    ) -> HandledResponse:
        """pass list of urls to dual_stack which sends requests in parallel
        handle response and exceptions of the first endpoint to respond
        """
        url_reader_parallel = partial(
            dual_stack,
            read_url_cb,
            stagger_delay=async_delay,
            timeout=timeout,
        )
        return read_url_handle_exceptions(
            url_reader_parallel, urls, start_time, exc_cb, log_cb
        )

    start_time = time.monotonic()
    if sleep_time and sleep_time_cb:
        raise ValueError("sleep_time and sleep_time_cb are mutually exclusive")

    # Dual-stack support factored out serial and parallel execution paths to
    # allow the retry loop logic to exist separately from the http calls.
    # Serial execution should be fundamentally the same as before, but with a
    # layer of indirection so that the parallel dual-stack path may use the
    # same max timeout logic.
    do_read_url = (
        read_url_serial if connect_synchronously else read_url_parallel
    )

    calculate_sleep_time = sleep_time_cb or default_sleep_time

    loop_n: int = 0
    response = None
    while True:
        resp = do_read_url(start_time, timeout, exception_cb, status_cb)
        must_try_again = False
        if resp.response:
            return resp.url, resp.response.contents
        elif resp.wait_time:
            time.sleep(resp.wait_time)
            loop_n = loop_n + 1
            must_try_again = True
            continue

        current_sleep_time = calculate_sleep_time(response, loop_n)
        if timeup(max_wait, start_time, current_sleep_time):
            break

        loop_n = loop_n + 1
        LOG.debug(
            "Please wait %s seconds while we wait to try again",
            current_sleep_time,
        )
        time.sleep(current_sleep_time)

        # shorten timeout to not run way over max_time
        current_time = time.monotonic()
        if timeout and current_time + timeout > start_time + max_wait:
            timeout = max_wait - (current_time - start_time)
            if timeout <= 0:
                # We've already exceeded our max_wait. Time to bail.
                break

    return False, None


class OauthUrlHelper:
    def __init__(
        self,
        consumer_key=None,
        token_key=None,
        token_secret=None,
        consumer_secret=None,
        skew_data_file="/run/oauth_skew.json",
    ):
        self.consumer_key = consumer_key
        self.consumer_secret = consumer_secret or ""
        self.token_key = token_key
        self.token_secret = token_secret
        self.skew_data_file = skew_data_file
        self._do_oauth = True
        self.skew_change_limit = 5
        required = (self.token_key, self.token_secret, self.consumer_key)
        if not any(required):
            self._do_oauth = False
        elif not all(required):
            raise ValueError(
                "all or none of token_key, token_secret, or "
                "consumer_key can be set"
            )

        old = self.read_skew_file()
        self.skew_data = old or {}

    def read_skew_file(self):
        if self.skew_data_file and os.path.isfile(self.skew_data_file):
            with performance.Timed(f"Reading {self.skew_data_file}"), open(
                self.skew_data_file, mode="r"
            ) as fp:
                return json.load(fp)
        return None

    def update_skew_file(self, host, value):
        # this is not atomic
        if not self.skew_data_file:
            return
        cur = self.read_skew_file()
        if cur is None:
            cur = {}
        cur[host] = value
        with performance.Timed(f"Writing {self.skew_data_file}"), open(
            self.skew_data_file, mode="w"
        ) as fp:
            fp.write(json.dumps(cur))

    def exception_cb(self, exception):
        if not (
            isinstance(exception, UrlError)
            and (exception.code == 403 or exception.code == 401)
        ):
            return

        if "date" not in exception.headers:
            LOG.warning("Missing header 'date' in %s response", exception.code)
            return

        date = exception.headers["date"]
        try:
            remote_time = time.mktime(parsedate(date))
        except Exception as e:
            LOG.warning("Failed to convert datetime '%s': %s", date, e)
            return

        skew = int(remote_time - time.time())
        host = urlparse(exception.url).netloc
        old_skew = self.skew_data.get(host, 0)
        if abs(old_skew - skew) > self.skew_change_limit:
            self.update_skew_file(host, skew)
            LOG.warning("Setting oauth clockskew for %s to %d", host, skew)
        self.skew_data[host] = skew

        return

    def headers_cb(self, url):
        if not self._do_oauth:
            return {}

        timestamp = None
        host = urlparse(url).netloc
        if self.skew_data and host in self.skew_data:
            timestamp = int(time.time()) + self.skew_data[host]

        return oauth_headers(
            url=url,
            consumer_key=self.consumer_key,
            token_key=self.token_key,
            token_secret=self.token_secret,
            consumer_secret=self.consumer_secret,
            timestamp=timestamp,
        )

    def _wrapped(self, wrapped_func, args, kwargs):
        kwargs["headers_cb"] = partial(
            self._headers_cb, kwargs.get("headers_cb")
        )
        kwargs["exception_cb"] = partial(
            self._exception_cb, kwargs.get("exception_cb")
        )
        return wrapped_func(*args, **kwargs)

    def wait_for_url(self, *args, **kwargs):
        return self._wrapped(wait_for_url, args, kwargs)

    def readurl(self, *args, **kwargs):
        return self._wrapped(readurl, args, kwargs)

    def _exception_cb(self, extra_exception_cb, exception):
        ret = True
        try:
            if extra_exception_cb:
                ret = extra_exception_cb(exception)
        finally:
            self.exception_cb(exception)
        return ret

    def _headers_cb(self, extra_headers_cb, url):
        headers = {}
        if extra_headers_cb:
            headers = extra_headers_cb(url)
        headers.update(self.headers_cb(url))
        return headers


def oauth_headers(
    url, consumer_key, token_key, token_secret, consumer_secret, timestamp=None
):
    try:
        import oauthlib.oauth1 as oauth1
    except ImportError as e:
        raise NotImplementedError("oauth support is not available") from e

    if timestamp:
        timestamp = str(timestamp)
    else:
        timestamp = None

    client = oauth1.Client(
        consumer_key,
        client_secret=consumer_secret,
        resource_owner_key=token_key,
        resource_owner_secret=token_secret,
        signature_method=oauth1.SIGNATURE_PLAINTEXT,
        timestamp=timestamp,
    )
    _uri, signed_headers, _body = client.sign(url)
    return signed_headers