1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
"""
Utility functions for decoding response bodies.
"""
from __future__ import absolute_import
import codecs
import collections
import six
from io import BytesIO
import gzip
import zlib
import brotli
from typing import Union # noqa
# We have a shared single-element cache for encoding and decoding.
# This is quite useful in practice, e.g.
# flow.request.content = flow.request.content.replace(b"foo", b"bar")
# does not require an .encode() call if content does not contain b"foo"
CachedDecode = collections.namedtuple("CachedDecode", "encoded encoding errors decoded")
_cache = CachedDecode(None, None, None, None)
def decode(encoded, encoding, errors='strict'):
# type: (Union[str, bytes], str, str) -> Union[str, bytes]
"""
Decode the given input object
Returns:
The decoded value
Raises:
ValueError, if decoding fails.
"""
if len(encoded) == 0:
return encoded
global _cache
cached = (
isinstance(encoded, bytes) and
_cache.encoded == encoded and
_cache.encoding == encoding and
_cache.errors == errors
)
if cached:
return _cache.decoded
try:
try:
decoded = custom_decode[encoding](encoded)
except KeyError:
decoded = codecs.decode(encoded, encoding, errors)
if encoding in ("gzip", "deflate", "br"):
_cache = CachedDecode(encoded, encoding, errors, decoded)
return decoded
except TypeError:
raise
except Exception as e:
raise ValueError("{} when decoding {} with {}: {}".format(
type(e).__name__,
repr(encoded)[:10],
repr(encoding),
repr(e),
))
def encode(decoded, encoding, errors='strict'):
# type: (Union[str, bytes], str, str) -> Union[str, bytes]
"""
Encode the given input object
Returns:
The encoded value
Raises:
ValueError, if encoding fails.
"""
if len(decoded) == 0:
return decoded
global _cache
cached = (
isinstance(decoded, bytes) and
_cache.decoded == decoded and
_cache.encoding == encoding and
_cache.errors == errors
)
if cached:
return _cache.encoded
try:
try:
value = decoded
if not six.PY2 and isinstance(value, six.string_types):
value = decoded.encode()
encoded = custom_encode[encoding](value)
except KeyError:
encoded = codecs.encode(decoded, encoding, errors)
if encoding in ("gzip", "deflate", "br"):
_cache = CachedDecode(encoded, encoding, errors, decoded)
return encoded
except TypeError:
raise
except Exception as e:
raise ValueError("{} when encoding {} with {}: {}".format(
type(e).__name__,
repr(decoded)[:10],
repr(encoding),
repr(e),
))
def identity(content):
"""
Returns content unchanged. Identity is the default value of
Accept-Encoding headers.
"""
return content
def decode_gzip(content):
gfile = gzip.GzipFile(fileobj=BytesIO(content))
return gfile.read()
def encode_gzip(content):
s = BytesIO()
gf = gzip.GzipFile(fileobj=s, mode='wb')
gf.write(content)
gf.close()
return s.getvalue()
def decode_brotli(content):
return brotli.decompress(content)
def encode_brotli(content):
return brotli.compress(content)
def decode_deflate(content):
"""
Returns decompressed data for DEFLATE. Some servers may respond with
compressed data without a zlib header or checksum. An undocumented
feature of zlib permits the lenient decompression of data missing both
values.
http://bugs.python.org/issue5784
"""
try:
return zlib.decompress(content)
except zlib.error:
return zlib.decompress(content, -15)
def encode_deflate(content):
"""
Returns compressed content, always including zlib header and checksum.
"""
return zlib.compress(content)
custom_decode = {
"none": identity,
"identity": identity,
"gzip": decode_gzip,
"deflate": decode_deflate,
"br": decode_brotli,
}
custom_encode = {
"none": identity,
"identity": identity,
"gzip": encode_gzip,
"deflate": encode_deflate,
"br": encode_brotli,
}
__all__ = ["encode", "decode"]
|