1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
from __future__ import annotations
from base64 import b64encode
from collections.abc import Mapping, MutableMapping, Sequence
from io import BytesIO
from typing import Any, TypeAlias, overload
from w3lib.util import to_bytes, to_unicode
HeadersDictInput: TypeAlias = Mapping[bytes, Any | Sequence[bytes]]
HeadersDictOutput: TypeAlias = MutableMapping[bytes, list[bytes]]
@overload
def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ...
@overload
def headers_raw_to_dict(headers_raw: None) -> None: ...
def headers_raw_to_dict(headers_raw: bytes | None) -> HeadersDictOutput | None:
r"""
Convert raw headers (single multi-line bytestring)
to a dictionary.
For example:
>>> import w3lib.http
>>> w3lib.http.headers_raw_to_dict(b"Content-type: text/html\n\rAccept: gzip\n\n") # doctest: +SKIP
{'Content-type': ['text/html'], 'Accept': ['gzip']}
Incorrect input:
>>> w3lib.http.headers_raw_to_dict(b"Content-typt gzip\n\n")
{}
>>>
Argument is ``None`` (return ``None``):
>>> w3lib.http.headers_raw_to_dict(None)
>>>
"""
if headers_raw is None:
return None
if not headers_raw:
return {}
result_dict: HeadersDictOutput = {}
for header in BytesIO(headers_raw):
key, sep, value = header.partition(b":")
if not sep:
continue
key, value = key.strip(), value.strip()
if key in result_dict:
result_dict[key].append(value)
else:
result_dict[key] = [value]
return result_dict
@overload
def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ...
@overload
def headers_dict_to_raw(headers_dict: None) -> None: ...
def headers_dict_to_raw(headers_dict: HeadersDictInput | None) -> bytes | None:
r"""
Returns a raw HTTP headers representation of headers
For example:
>>> import w3lib.http
>>> w3lib.http.headers_dict_to_raw({b'Content-type': b'text/html', b'Accept': b'gzip'}) # doctest: +SKIP
'Content-type: text/html\\r\\nAccept: gzip'
>>>
Note that keys and values must be bytes.
Argument is ``None`` (returns ``None``):
>>> w3lib.http.headers_dict_to_raw(None)
>>>
"""
if headers_dict is None:
return None
if not headers_dict:
return b""
parts = bytearray()
for key, value in headers_dict.items():
if isinstance(value, bytes):
if parts:
parts.extend(b"\r\n")
parts.extend(key + b": " + value)
elif isinstance(value, (list, tuple)):
for v in value:
if parts:
parts.extend(b"\r\n")
parts.extend(key + b": " + v)
return bytes(parts)
def basic_auth_header(
username: str | bytes, password: str | bytes, encoding: str = "ISO-8859-1"
) -> bytes:
"""
Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_
>>> import w3lib.http
>>> w3lib.http.basic_auth_header('someuser', 'somepass')
b'Basic c29tZXVzZXI6c29tZXBhc3M='
.. _HTTP Basic Access Authentication (RFC 2617): http://www.ietf.org/rfc/rfc2617.txt
"""
auth = f"{to_unicode(username)}:{to_unicode(password)}"
# XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
# seems to be the most widely used encoding here. See also:
# http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html
return b"Basic " + b64encode(to_bytes(auth, encoding=encoding))
|