File: no_vary_search_cache.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (346 lines) | stat: -rw-r--r-- 14,193 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef NET_HTTP_NO_VARY_SEARCH_CACHE_H_
#define NET_HTTP_NO_VARY_SEARCH_CACHE_H_

#include <stddef.h>

#include <compare>
#include <map>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>

#include "base/containers/linked_list.h"
#include "base/functional/function_ref.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/stack_allocated.h"
#include "base/memory/weak_ptr.h"
#include "base/types/strong_alias.h"
#include "net/base/does_url_match_filter.h"
#include "net/base/net_export.h"
#include "net/base/pickle_traits.h"
#include "net/http/http_no_vary_search_data.h"
#include "net/http/http_request_info.h"
#include "url/gurl.h"

namespace net {

class HttpResponseHeaders;

// An in-memory cache that permits looking up a URL and seeing if it matches a
// previous response according to the rules of the No-Vary-Search header (see
// https://httpwg.org/http-extensions/draft-ietf-httpbis-no-vary-search.html).
// See also the design doc at
// https://docs.google.com/document/d/1RS3q6qZ7-k9CvZsDYseGOXzcdQ9fGZ6YYnaW7fTPu7A/edit
//
// Owned by net::HttpCache.
//
// Ignoring eviction, the data structure is approximately equivalent to
// std::map<std::pair<BaseURLCacheKey, HttpNoVarySearchData>,
//          std::list<QueryString>>.
//
// BaseURLCacheKey is the output of the HttpCache key algorithm run on the base
// URL (everything before the "?"). So it incorporates the NetworkIsolationKey
// when split cache is enabled.
class NET_EXPORT_PRIVATE NoVarySearchCache {
 private:
  // Declared here so that it can be mentioned in the definition of EraseHandle.
  class QueryString;

 public:
  // Opaque object that permits erasure of an item from the cache.
  // See comments on the Lookup() and Erase() methods for usage.
  class NET_EXPORT_PRIVATE EraseHandle {
   public:
    ~EraseHandle();

    // Not copyable.
    EraseHandle(const EraseHandle&) = delete;
    EraseHandle& operator=(const EraseHandle&) = delete;

    // Movable.
    EraseHandle(EraseHandle&& rhs);
    EraseHandle& operator=(EraseHandle&& rhs);

    // For unit tests it is useful to be able to inspect this.
    bool EqualsForTesting(const EraseHandle& rhs) const;
    bool IsGoneForTesting() const;

   private:
    friend class NoVarySearchCache;
    friend class QueryString;

    explicit EraseHandle(base::WeakPtr<QueryString> query_string);

    base::WeakPtr<QueryString> query_string_;
  };

  // An interface for receiving notifications about changes to the
  // NoVarySearchCache. Only insertions and refreshes via MaybeInsert() and
  // erasures via Erase() are reported to this interface. Evictions are
  // implicit, and modifications via ClearData() are expected to be followed by
  // persisting a fresh copy of the database.
  class NET_EXPORT_PRIVATE Journal {
   public:
    Journal() = default;

    Journal(const Journal&) = delete;
    Journal& operator=(const Journal&) = delete;

    // Called when an entry is inserted or refreshed by the MaybeInsert()
    // method. Not called when MaybeInsert() results in no changes to the
    // database. Also called by MergeFrom() for each merged entry.
    virtual void OnInsert(const std::string& base_url_cache_key,
                          const HttpNoVarySearchData& nvs_data,
                          const std::optional<std::string>& query,
                          base::Time update_time) = 0;

    // Called when an entry is erased by the Erase() method.
    virtual void OnErase(const std::string& base_url_cache_key,
                         const HttpNoVarySearchData& nvs_data,
                         const std::optional<std::string>& query) = 0;

   protected:
    // Journal objects are never deleted via a base class pointer.
    virtual ~Journal();
  };

  struct LookupResult {
    GURL original_url;
    EraseHandle erase_handle;
  };

  // The cache will hold at most `max_size` entries. Each entry stores the query
  // parameter from a previous response, which will typically be 100 to 200
  // bytes.
  explicit NoVarySearchCache(size_t max_size);

  // Move-constructible to permit deserialization and passing between threads.
  NoVarySearchCache(NoVarySearchCache&&);

  // Not copyable or assignable.
  NoVarySearchCache(const NoVarySearchCache&) = delete;
  NoVarySearchCache& operator=(const NoVarySearchCache&) = delete;
  NoVarySearchCache& operator=(NoVarySearchCache&&) = delete;

  ~NoVarySearchCache();

  // Finds an entry in the cache equivalent to `request.url` and in the same
  // cache partition. If a result is returned, then `original_url` can be used
  // to find a disk cache entry. `erase_handle` can be used to remove the entry
  // from this cache if it was not in the disk cache. Not const because it
  // updates the LRU linked list to mark the entry as recently used.
  std::optional<LookupResult> Lookup(const HttpRequestInfo& request);

  // Inserts `url` into the cache if a non-default "No-Vary-Search" header was
  // found in `headers`. On insertion, will remove any existing matching entry
  // with the same No-Vary-Search header, as the older entry would never be
  // returned by Lookup() anyway. May evict the oldest entry in the cache to
  // avoid the size exceeding `max_size_`.
  void MaybeInsert(const HttpRequestInfo& request,
                   const HttpResponseHeaders& headers);

  // Synchronously deletes entries that match `origins` or `domains` with update
  // times equal or greater than `delete_begin` and less than `delete_end`.
  // Setting `filter_type` to UrlFilterType::kFalseIfMatching inverts the
  // meaning of `origins` and `domains` as with DoesURlMatchFilter(), but
  // doesn't affect the interpretation of `delete_begin` and `delete_end`.
  // In particular, ClearData(URLFilterType::kFalseIfMatching, {}, {},
  // base::Time(), base::Time::Max()) will delete everything. Returns `true` if
  // anything was removed.
  bool ClearData(UrlFilterType filter_type,
                 const base::flat_set<url::Origin>& origins,
                 const base::flat_set<std::string>& domains,
                 base::Time delete_begin,
                 base::Time delete_end);

  // Erases the entry referenced by `erase_handle` from the cache. Does
  // nothing if the entry no longer exists.
  void Erase(EraseHandle handle);

  // Set a Journal to be notified about subsequent changes to the cache. This
  // object does not take ownership of the Journal. Calling the method again
  // will replace the journal. The method can be called with nullptr to stop
  // being notified.
  void SetJournal(Journal* journal);

  // Adds the specified entry to the cache as if by MaybeInsert(), evicting an
  // older entry if the cache is full. The entry is treated as if newly used for
  // the purposes of eviction. For use when replaying journalled entries. The
  // arguments are expected to match a previous call to Journal::OnInsert()
  // from a different instance of NoVarySearchCache, but with the same settings
  // for cache partitioning. It can also be called with other valid arguments
  // for testing. If a valid base URL cannot be extracted from
  // `base_url_cache_key`, or `query` contains an invalid character, the call is
  // ignored. This will never happen if the arguments are unchanged from a call
  // to Journal::OnInsert() with the same partitioning. A valid base URL does
  // not contain a query or a fragment. Journal methods are not called.
  void ReplayInsert(std::string base_url_cache_key,
                    HttpNoVarySearchData nvs_data,
                    std::optional<std::string> query,
                    base::Time update_time);

  // Removes the specified entry from the cache as if by Erase(). For use when
  // replaying journalled entries. The arguments are expected to match a
  // previous call to Journal::OnErase from a different instance of
  // NoVarySearchCache, with the same settings for cache partitioning
  // base::Features. If `query` is not found the call silently
  // does nothing. Journal methods are not called.
  void ReplayErase(const std::string& base_url_cache_key,
                   const HttpNoVarySearchData& nvs_data,
                   const std::optional<std::string>& query);

  // Merge entries from `newer` in order from the least-recently-used to the
  // most-recently-used, treating them as newly used. Less recently-used entries
  // will be evicted if necessary to avoid exceeding the maximum size.
  // Journal::OnInsert() is called as if the entries were newly inserted (but
  // with the original update_time).
  void MergeFrom(const NoVarySearchCache& newer);

  // Returns the size (number of stored original query strings) of the cache.
  size_t size() const { return size_; }

  // Return the maximum size for the cache. Attempting to add more than this
  // many entries will result in older entries being evicted.
  size_t max_size() const { return max_size_; }

  // Returns true if the top-level map is empty. This should be equivalent to
  // size() == 0 in the absence of bugs.
  bool IsTopLevelMapEmptyForTesting() const;

 private:
  friend struct PickleTraits<NoVarySearchCache>;

  struct QueryStringList;
  friend struct PickleTraits<NoVarySearchCache::QueryStringList>;

  using BaseURLCacheKey =
      base::StrongAlias<struct BaseURLCacheKeyTagType, std::string>;
  friend struct PickleTraits<NoVarySearchCache::BaseURLCacheKey>;

  class LruNode;
  class QueryStringListNode;

  struct QueryStringList {
    base::LinkedList<QueryStringListNode> list;
    // nvs_data_ref can't be raw_ref because it needs to be lazily initialized
    // after the QueryStringList has been added to the map.
    raw_ptr<const HttpNoVarySearchData> nvs_data_ref = nullptr;

    // key_ref can't be raw_ref because it needs to be added in a second pass
    // during deserialization.
    raw_ptr<const BaseURLCacheKey> key_ref = nullptr;

    // The referent of this reference has to be the actual key in the map. It is
    // not sufficient for the value to match, because the lifetime has to be the
    // same.
    explicit QueryStringList(const BaseURLCacheKey& key);

    // Needed during deserialization.
    QueryStringList();

    // Only used during deserialization. This is O(N) in the size of `list`.
    QueryStringList(QueryStringList&&);

    // base::LinkedList<> does not do memory management, so make sure the
    // contents of `list` are deleted on destruction.
    ~QueryStringList();
  };

  struct FindQueryStringResult {
    STACK_ALLOCATED();  // `match` doesn't need to be raw_ptr.

   public:
    QueryString* match;
    GURL original_url;
  };

  // TODO(crbug.com/382394774): Investigate performance of different map types.
  using DataMapType = std::map<HttpNoVarySearchData, QueryStringList>;
  using OuterMapType = std::map<BaseURLCacheKey, DataMapType, std::less<>>;

  // Erases an entry from the cache if `size_ > max_size_`.
  void EvictIfOverfull();

  // Erases `query_string` from the cache.
  void EraseQuery(QueryString* query_string);

  // Inserts `query` or marks it as used in the cache. evicting an older entry
  // if necessary to make space. `journal` is notified if set.
  void DoInsert(const GURL& url,
                const GURL& base_url,
                std::string base_url_cache_key,
                HttpNoVarySearchData nvs_data,
                std::optional<std::string_view> query,
                base::Time update_time,
                Journal* journal);

  // A convenience method for callers that do not have the original URL handy.
  // Reconstructs the original URL and then calls DoInsert().
  void ReconstructURLAndDoInsert(const GURL& base_url,
                                 std::string base_url_cache_key,
                                 HttpNoVarySearchData nvs_data,
                                 std::optional<std::string> query,
                                 base::Time update_time,
                                 Journal* journal);

  // Scans all the QueryStrings in `data_map` to find ones in the range
  // [delete_begin, delete_end) and appends them to `matches`. `data_map` is
  // mutable to reflect that it is returning mutable pointers to QueryString
  // objects that it owns. The returned QueryString objects are mutable so the
  // caller can erase them.
  static void FindQueryStringsInTimeRange(DataMapType& data_map,
                                          base::Time delete_begin,
                                          base::Time delete_end,
                                          std::vector<QueryString*>& matches);

  static std::optional<FindQueryStringResult> FindQueryStringInList(
      QueryStringList& query_strings,
      const GURL& base,
      const GURL& url,
      const HttpNoVarySearchData& nvs_data);

  // Calls f(query_string_ptr) for every QueryString in `list`.
  static void ForEachQueryString(base::LinkedList<QueryStringListNode>& list,
                                 base::FunctionRef<void(QueryString*)> f);

  // Calls f(const_query_string_ptr) for every QueryString in `list`.
  static void ForEachQueryString(
      const base::LinkedList<QueryStringListNode>& list,
      base::FunctionRef<void(const QueryString*)> f);

  // The main cache data structure.
  OuterMapType map_;

  // lru_.tail() is the least-recently-used QueryString.
  base::LinkedList<LruNode> lru_;

  // The number of QueryString objects in the cache.
  size_t size_ = 0u;

  // QueryString objects will be evicted to avoid exceeding `max_size_`.
  const size_t max_size_;

  // An object to be notified about changes to this cache.
  raw_ptr<Journal> journal_ = nullptr;
};

template <>
struct NET_EXPORT_PRIVATE PickleTraits<NoVarySearchCache> {
  static void Serialize(base::Pickle& pickle, const NoVarySearchCache& cache);

  static std::optional<NoVarySearchCache> Deserialize(
      base::PickleIterator& iter);

  static size_t PickleSize(const NoVarySearchCache& cache);
};

}  // namespace net

#endif  // NET_HTTP_NO_VARY_SEARCH_CACHE_H_