1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
|
"""Base classes for all cache backends
.. automodsumm:: requests_cache.backends.base
:classes-only:
:nosignatures:
"""
from __future__ import annotations
from abc import ABC
from collections import UserDict
from datetime import datetime
from logging import getLogger
from pickle import PickleError
from typing import (
TYPE_CHECKING,
Iterable,
Iterator,
List,
MutableMapping,
Optional,
TypeVar,
)
from warnings import warn
from requests import Request, Response
from ..cache_keys import create_key, redact_response
from ..models import AnyRequest, CachedResponse
from ..policy import DEFAULT_CACHE_NAME, CacheSettings, ExpirationTime
from ..serializers import SerializerType, init_serializer
# Specific exceptions that may be raised during deserialization
DESERIALIZE_ERRORS = (AttributeError, ImportError, PickleError, TypeError, ValueError)
logger = getLogger(__name__)
class BaseCache:
"""Base class for cache backends. Can be used as a non-persistent, in-memory cache.
This manages higher-level cache operations, including:
* Saving and retrieving responses
* Managing redirect history
* Convenience methods for general cache info
* Dict-like wrapper methods around the underlying storage
Notes:
* Lower-level storage operations are handled by :py:class:`.BaseStorage`.
* To extend this with your own custom backend, see :ref:`custom-backends`.
Args:
cache_name: Cache prefix or namespace, depending on backend
serializer: Serializer name or instance
kwargs: Additional backend-specific keyword arguments
"""
def __init__(self, cache_name: str = DEFAULT_CACHE_NAME, **kwargs):
self.cache_name = cache_name
self.responses: BaseStorage[str, CachedResponse] = DictStorage()
self.redirects: BaseStorage[str, str] = DictStorage()
self._settings = CacheSettings() # Init and public access is done in CachedSession
# Main cache operations
# ---------------------
def get_response(self, key: str, default=None) -> Optional[CachedResponse]:
"""Retrieve a response from the cache, if it exists
Args:
key: Cache key for the response
default: Value to return if `key` is not in the cache
"""
try:
response = self.responses.get(key)
if response is None: # Note: bool(requests.Response) is False if status > 400
response = self.responses[self.redirects[key]]
return response
except (AttributeError, KeyError):
return default
def save_response(
self,
response: Response,
cache_key: Optional[str] = None,
expires: Optional[datetime] = None,
):
"""Save a response to the cache
Args:
cache_key: Cache key for this response; will otherwise be generated based on request
response: Response to save
expires: Absolute expiration time for this response
"""
cache_key = cache_key or self.create_key(response.request)
cached_response = CachedResponse.from_response(response, expires=expires)
cached_response = redact_response(cached_response, self._settings.ignored_parameters)
self.responses[cache_key] = cached_response
# Save redirect aliases, unless this is a revalidation (i.e., it was saved previously)
if response.history and not cached_response.revalidated:
for r in response.history:
self.redirects[self.create_key(r.request)] = cache_key
def clear(self):
"""Delete all items from the cache"""
logger.info('Clearing all items from the cache')
self.responses.clear()
self.redirects.clear()
def close(self):
"""Close any open backend connections"""
logger.debug('Closing backend connections')
self.responses.close()
self.redirects.close()
def create_key(
self,
request: AnyRequest,
match_headers: Optional[Iterable[str]] = None,
**kwargs,
) -> str:
"""Create a normalized cache key from a request object"""
key_fn = self._settings.key_fn if self._settings.key_fn is not None else create_key
return key_fn(
request=request,
ignored_parameters=self._settings.ignored_parameters,
match_headers=match_headers or self._settings.match_headers,
serializer=self.responses.serializer,
**kwargs,
)
# Convenience methods
# --------------------
def contains(
self,
key: Optional[str] = None,
request: Optional[AnyRequest] = None,
url: Optional[str] = None,
):
"""Check if the specified request is cached
Args:
key: Check for a specific cache key
request: Check for a matching request, according to current request matching settings
url: Check for a matching GET request with the specified URL
"""
if url:
request = Request('GET', url)
if request and not key:
key = self.create_key(request)
return key in self.responses or key in self.redirects
def delete(
self,
*keys: str,
expired: bool = False,
invalid: bool = False,
older_than: ExpirationTime = None,
requests: Optional[Iterable[AnyRequest]] = None,
urls: Optional[Iterable[str]] = None,
):
"""Remove responses from the cache according one or more conditions.
Args:
keys: Remove responses with these cache keys
expired: Remove all expired responses
invalid: Remove all invalid responses (that can't be deserialized with current settings)
older_than: Remove responses older than this value, relative to ``response.created_at``
requests: Remove matching responses, according to current request matching settings
urls: Remove matching GET requests for the specified URL(s)
"""
delete_keys: List[str] = list(keys) if keys else []
if urls:
requests = list(requests or []) + [Request('GET', url).prepare() for url in urls]
if requests:
delete_keys += [self.create_key(request) for request in requests]
for response in self.filter(
valid=False, expired=expired, invalid=invalid, older_than=older_than
):
delete_keys.append(response.cache_key)
logger.debug(f'Deleting up to {len(delete_keys)} responses')
# For some backends, we don't want to use bulk_delete if there's only one key
if len(delete_keys) == 1:
try:
del self.responses[delete_keys[0]]
except KeyError:
pass
else:
self.responses.bulk_delete(delete_keys)
self._prune_redirects()
def _prune_redirects(self):
"""Remove any redirects that no longer point to an existing response"""
invalid_redirects = [k for k, v in self.redirects.items() if v not in self.responses]
self.redirects.bulk_delete(invalid_redirects)
def filter(
self,
valid: bool = True,
expired: bool = True,
invalid: bool = False,
older_than: ExpirationTime = None,
) -> Iterator[CachedResponse]:
"""Get responses from the cache, with optional filters for which responses to include:
Args:
valid: Include valid and unexpired responses; set to ``False`` to get **only**
expired/invalid/old responses
expired: Include expired responses
invalid: Include invalid responses (as an empty ``CachedResponse``)
older_than: Get responses older than this value, relative to ``response.created_at``
"""
if not any([valid, expired, invalid, older_than]):
return
for key in self.responses.keys():
response = self.get_response(key)
# Use an empty response as a placeholder for an invalid response, if specified
if invalid and response is None:
response = CachedResponse(status_code=504)
response.cache_key = key
yield response
elif response is not None and (
(valid and not response.is_expired)
or (expired and response.is_expired)
or (older_than and response.is_older_than(older_than))
):
yield response
def recreate_keys(self):
"""Recreate cache keys for all previously cached responses"""
logger.debug('Recreating all cache keys')
old_keys = list(self.responses.keys())
for old_cache_key in old_keys:
response = self.responses[old_cache_key]
# Adjust empty request body for responses cached before 1.0
if response.request.body == b'None':
response.request.body = b''
new_cache_key = self.create_key(response.request)
if new_cache_key != old_cache_key:
self.responses[new_cache_key] = response
del self.responses[old_cache_key]
# This is deprecated, but still appears in various examples online, so I'll postpone removing it
# for awhile longer.
def remove_expired_responses(self, expire_after: ExpirationTime = None):
warn(
'remove_expired_responses() is deprecated; please use delete(expired=True) instead',
DeprecationWarning,
stacklevel=2,
)
if expire_after:
self.reset_expiration(expire_after)
self.delete(expired=True, invalid=True)
def reset_expiration(self, expire_after: ExpirationTime = None):
"""Set a new expiration value to set on existing cache items
Args:
expire_after: New expiration value, **relative to the current time**
"""
logger.info(f'Resetting expiration with: {expire_after}')
for response in self.filter():
response.reset_expiration(expire_after)
self.responses[response.cache_key] = response
def update(self, other: 'BaseCache'):
"""Update this cache with the contents of another cache"""
logger.debug(f'Copying {len(other.responses)} responses from {repr(other)} to {repr(self)}')
self.responses.update(other.responses)
self.redirects.update(other.redirects)
def urls(self, **kwargs) -> List[str]:
"""Get all unique cached URLs. Optionally takes keyword arguments for :py:meth:`.filter`."""
return sorted({response.url for response in self.filter(**kwargs)})
def __str__(self):
return f'<{self.__class__.__name__}(name={self.cache_name})>'
def __repr__(self):
return str(self)
KT = TypeVar('KT')
VT = TypeVar('VT')
class BaseStorage(MutableMapping[KT, VT], ABC):
"""Base class for client-agnostic storage implementations. Notes:
* This provides a common dictionary-like interface for the underlying storage operations
(create, read, update, delete).
* One ``BaseStorage`` instance corresponds to a single table/hash/collection, or whatever the
backend-specific equivalent may be.
* ``BaseStorage`` subclasses contain no behavior specific to ``requests``, which are handled by
:py:class:`.BaseCache` subclasses.
* ``BaseStorage`` also contains a serializer object (defaulting to :py:mod:`pickle`), which
determines how :py:class:`.CachedResponse` objects are saved internally. See :ref:`serializers`
for details.
Args:
serializer: Custom serializer that provides ``loads`` and ``dumps`` methods.
If not provided, values will be written as-is.
decode_content: Decode response body JSON or text into a human-readable format
kwargs: Additional backend-specific keyword arguments
"""
def __init__(
self,
serializer: Optional[SerializerType] = None,
decode_content: bool = False,
**kwargs,
):
self.serializer = init_serializer(serializer, decode_content)
logger.debug(f'Initialized {type(self).__name__} with serializer: {self.serializer}')
def bulk_delete(self, keys: Iterable[KT]):
"""Delete multiple keys from the cache, without raising errors for missing keys.
This is a naive, generic implementation that subclasses should override with a more
efficient backend-specific implementation, if possible.
"""
for k in keys:
try:
del self[k]
except KeyError:
pass
def close(self):
"""Close any open backend connections"""
def serialize(self, value: VT):
"""Serialize a value, if a serializer is available"""
if TYPE_CHECKING:
assert hasattr(self.serializer, 'dumps')
return self.serializer.dumps(value) if self.serializer else value
def deserialize(self, key, value: VT):
"""Deserialize a value, if a serializer is available.
If deserialization fails (usually due to a value saved in an older requests-cache version),
``None`` will be returned.
"""
if not self.serializer:
return value
if TYPE_CHECKING:
assert hasattr(self.serializer, 'loads')
try:
obj = self.serializer.loads(value)
# Set cache key, if it's a response object
try:
obj.cache_key = key
except AttributeError:
pass
return obj
except DESERIALIZE_ERRORS as e:
logger.error(f'Unable to deserialize response: {str(e)}')
logger.debug(e, exc_info=True)
return None
def __str__(self):
return str(list(self.keys()))
class DictStorage(UserDict, BaseStorage):
"""A basic dict wrapper class for non-persistent, in-memory storage
.. note::
This is mostly a placeholder for when no other backends are available. For in-memory
caching, either :py:class:`.SQLiteCache` (with `use_memory=True`) or :py:class:`.RedisCache`
is recommended instead.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.serializer = None
def __getitem__(self, key):
"""An additional step is needed here for response data. The original response object
is still in memory, and hasn't gone through a serialize/deserialize loop. So, the file-like
response body has already been read, and needs to be reset.
"""
item = super().__getitem__(key)
if getattr(item, 'raw', None):
item.raw.reset()
try:
item.cache_key = key
except AttributeError:
pass
return item
|