File: visitedlink_common.h

package info (click to toggle)
chromium 139.0.7258.138-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 6,120,676 kB
  • sloc: cpp: 35,100,869; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (198 lines) | stat: -rw-r--r-- 7,499 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// Copyright 2006-2008 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
#define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_

#include <stddef.h>
#include <stdint.h>

#include <string_view>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "components/visitedlink/core/visited_link.h"

class GURL;

namespace net {
class SchemefulSite;
}

namespace url {
class Origin;
}

namespace visitedlink {

// number of bytes in the salt
#define LINK_SALT_LENGTH 8

// A multiprocess-safe database of the visited links for the browser. There
// should be exactly one process that has write access (implemented by
// VisitedLinkWriter), while all other processes should be read-only
// (implemented by VisitedLinkReader). These other processes add links by
// calling the writer process to add them for it. The writer may also notify the
// readers to replace their table when the table is resized.
//
// IPC is not implemented in these classes. This is done through callback
// functions supplied by the creator of these objects to allow more flexibility,
// especially for testing.
//
// This class defines the common base for these others. We implement accessors
// for looking things up in the hash table, and for computing hash values and
// fingerprints. Both the writer and the reader inherit from this, and add their
// own code to set up and change these values as their design requires. The
// reader pretty much just sets up the shared memory and saves the pointer. The
// writer does a lot of work to manage the table, reading and writing it to and
// from disk, and resizing it when it gets too full.
//
// To ask whether a page is in history, we compute a 64-bit fingerprint of the
// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
// we consider it visited. Otherwise, it is unvisited. Note that it is possible
// to get collisions, which is the penalty for not storing all URL strings in
// memory (which could get to be more than we want to have in memory). We use
// a salt value for the links on one computer so that an attacker can not
// manually create a link that causes a collision.
class VisitedLinkCommon {
 public:
  // A number that identifies the URL.
  typedef uint64_t Fingerprint;
  typedef std::vector<Fingerprint> Fingerprints;

  // A hash value of a fingerprint
  typedef int32_t Hash;

  // A fingerprint or hash value that does not exist
  static const Fingerprint null_fingerprint_;
  static const Hash null_hash_;

  VisitedLinkCommon();

  VisitedLinkCommon(const VisitedLinkCommon&) = delete;
  VisitedLinkCommon& operator=(const VisitedLinkCommon&) = delete;

  virtual ~VisitedLinkCommon();

  // Returns the fingerprint for the given URL.
  Fingerprint ComputeURLFingerprint(std::string_view canonical_url) const {
    return ComputeURLFingerprint(canonical_url, salt_);
  }

  // Looks up the given key in the table. Returns true if found. Does not
  // modify the hashtable.
  bool IsVisited(std::string_view canonical_url) const;
  bool IsVisited(const GURL& url) const;
  // To check if a link is visited in a partitioned table, callers MUST
  // provide <link url, top-level site, frame origin> AND the origin salt.
  bool IsVisited(const VisitedLink& link, uint64_t salt);
  bool IsVisited(const GURL& link_url,
                 const net::SchemefulSite& top_level_site,
                 const url::Origin& frame_origin,
                 uint64_t salt);
  bool IsVisited(Fingerprint fingerprint) const;

#ifdef UNIT_TEST
  // Returns statistics about DB usage
  void GetUsageStatistics(int32_t* table_size,
                          VisitedLinkCommon::Fingerprint** fingerprints) {
    *table_size = table_length_;
    *fingerprints = hash_table_;
  }
#endif

 protected:
  // This structure is at the beginning of the unpartitioned shared memory so
  // that the readers can get stats on the table.
  struct SharedHeader {
    // see goes into table_length_
    uint32_t length;

    // goes into salt_
    uint8_t salt[LINK_SALT_LENGTH];

    // Padding to ensure the Fingerprint table is aligned. Without this, reading
    // from the table causes unaligned reads.
    uint8_t padding[4];
  };

  static_assert(sizeof(SharedHeader) % alignof(Fingerprint) == 0,
                "Fingerprint must be aligned when placed after SharedHeader");

  // This structure is at the beginning of the partitioned shared memory so that
  // the readers can get stats on the table. We do not include a salt in this
  // shared header, as the salts are generated per-origin for the partitioned
  // table. Readers will receive their salts via the
  // VisitedLinkNavigationThrottle or via the VisitedLinkNotificationSink IPC.
  struct PartitionedSharedHeader {
    // see goes into table_length_
    uint32_t length;

    // Padding to ensure the Fingerprint table is aligned. Without this, reading
    // from the table causes unaligned reads.
    uint8_t padding[4];
  };

  static_assert(
      sizeof(PartitionedSharedHeader) % alignof(Fingerprint) == 0,
      "Fingerprint must be aligned when placed after PartitionedSharedHeader");

  // Returns the fingerprint at the given index into the URL table. This
  // function should be called instead of accessing the table directly to
  // contain endian issues.
  Fingerprint FingerprintAt(int32_t table_offset) const {
    if (!hash_table_)
      return null_fingerprint_;
    return hash_table_[table_offset];
  }

  // Computes the fingerprint of the given canonical URL. It is static so the
  // same algorithm can be re-used by the table rebuilder, so you will have to
  // pass the salt as a parameter. See the non-static version above if you
  // want to use the current class' salt.
  static Fingerprint ComputeURLFingerprint(
      std::string_view canonical_url,
      const uint8_t salt[LINK_SALT_LENGTH]);

  // Computes the fingerprint of the given VisitedLink using the provided
  // per-origin `salt`.
  static Fingerprint ComputePartitionedFingerprint(const VisitedLink& link,
                                                   uint64_t salt);

  // Computes the fingerprint of the given partition key. Used when constructing
  // the partitioned :visited link hashtable.
  static Fingerprint ComputePartitionedFingerprint(
      std::string_view canonical_link_url,
      const net::SchemefulSite& top_level_site,
      const url::Origin& frame_origin,
      uint64_t salt);

  // Computes the hash value of the given fingerprint, this is used as a lookup
  // into the hashtable.
  static Hash HashFingerprint(Fingerprint fingerprint, int32_t table_length) {
    if (table_length == 0)
      return null_hash_;
    return static_cast<Hash>(fingerprint % table_length);
  }
  // Uses the current hashtable.
  Hash HashFingerprint(Fingerprint fingerprint) const {
    return HashFingerprint(fingerprint, table_length_);
  }

  // pointer to the first item
  // May temporarily point to an old unmapped region during update.
  raw_ptr<VisitedLinkCommon::Fingerprint,
          DisableDanglingPtrDetection | AllowPtrArithmetic>
      hash_table_ = nullptr;

  // the number of items in the hash table
  int32_t table_length_ = 0;

  // salt used for each URL when computing the fingerprint
  uint8_t salt_[LINK_SALT_LENGTH];
};

}  // namespace visitedlink

#endif  // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_