File: visitedlink_common.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (184 lines) | stat: -rw-r--r-- 6,497 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/390223051): Remove C-library calls to fix the errors.
#pragma allow_unsafe_libc_calls
#endif

#include "components/visitedlink/common/visitedlink_common.h"

#include <string.h>  // for memset()

#include <ostream>
#include <string_view>

#include "base/bit_cast.h"
#include "base/check.h"
#include "base/hash/md5.h"
#include "base/notreached.h"
#include "components/visitedlink/core/visited_link.h"
#include "net/base/schemeful_site.h"
#include "url/gurl.h"
#include "url/origin.h"

namespace {
visitedlink::VisitedLinkCommon::Fingerprint ConvertDigestToFingerprint(
    base::MD5Digest digest) {
  // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that
  // direct cast the alignment could be wrong, and we can't access a 64-bit int
  // on arbitrary alignment on some processors. This reinterpret_casts it
  // down to a char array of the same size as fingerprint, and then does the
  // bit cast, which amounts to a memcpy. This does not handle endian issues.
  return base::bit_cast<visitedlink::VisitedLinkCommon::Fingerprint,
                        uint8_t[8]>(
      *reinterpret_cast<uint8_t(*)[8]>(&digest.a));
}
}  // namespace

namespace visitedlink {

const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0;
const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1;

VisitedLinkCommon::VisitedLinkCommon() {
  memset(salt_, 0, sizeof(salt_));
}

VisitedLinkCommon::~VisitedLinkCommon() = default;

// FIXME: this uses linear probing, it should be replaced with quadratic
// probing or something better. See VisitedLinkWriter::AddFingerprint
bool VisitedLinkCommon::IsVisited(std::string_view canonical_url) const {
  if (canonical_url.size() == 0) {
    return false;
  }
  if (!hash_table_ || table_length_ == 0) {
    return false;
  }
  return IsVisited(ComputeURLFingerprint(canonical_url));
}

bool VisitedLinkCommon::IsVisited(const GURL& url) const {
  return IsVisited(url.spec());
}

bool VisitedLinkCommon::IsVisited(const VisitedLink& link, uint64_t salt) {
  if (!hash_table_ || table_length_ == 0) {
    return false;
  }
  if (!link.IsValid()) {
    return false;
  }
  return IsVisited(ComputePartitionedFingerprint(link, salt));
}

bool VisitedLinkCommon::IsVisited(const GURL& link_url,
                                  const net::SchemefulSite& top_level_site,
                                  const url::Origin& frame_origin,
                                  uint64_t salt) {
  const VisitedLink link = {link_url, top_level_site, frame_origin};
  return IsVisited(link, salt);
}

bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const {
  // Go through the table until we find the item or an empty spot (meaning it
  // wasn't found). This loop will terminate as long as the table isn't full,
  // which should be enforced by AddFingerprint.
  Hash first_hash = HashFingerprint(fingerprint);
  Hash cur_hash = first_hash;
  while (true) {
    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    if (cur_fingerprint == null_fingerprint_)
      return false;  // End of probe sequence found.
    if (cur_fingerprint == fingerprint)
      return true;  // Found a match.

    // This spot was taken, but not by the item we're looking for, search in
    // the next position.
    cur_hash++;
    if (cur_hash == table_length_)
      cur_hash = 0;
    if (cur_hash == first_hash) {
      // Wrapped around and didn't find an empty space, this means we're in an
      // infinite loop because AddFingerprint didn't do its job resizing.
      NOTREACHED();
    }
  }
}

// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
// this is as random as any other subset of the MD5SUM.
//
// FIXME: this uses the MD5SUM of the 16-bit character version. For systems
// where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be
// compatable. We should define explicitly what should happen here across
// platforms, and convert if necessary (probably to UTF-16).

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint(
    std::string_view canonical_url,
    const uint8_t salt[LINK_SALT_LENGTH]) {
  DCHECK(canonical_url.size() > 0) << "Canonical URLs should not be empty";

  base::MD5Context ctx;
  base::MD5Init(&ctx);
  base::MD5Update(&ctx, std::string_view(reinterpret_cast<const char*>(salt),
                                         LINK_SALT_LENGTH));
  base::MD5Update(&ctx, canonical_url);

  base::MD5Digest digest;
  base::MD5Final(&digest, &ctx);

  return ConvertDigestToFingerprint(digest);
}

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputePartitionedFingerprint(
    const VisitedLink& link,
    uint64_t salt) {
  return ComputePartitionedFingerprint(
      link.link_url.spec(), link.top_level_site, link.frame_origin, salt);
}

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputePartitionedFingerprint(
    std::string_view canonical_link_url,
    const net::SchemefulSite& top_level_site,
    const url::Origin& frame_origin,
    uint64_t salt) {
  DCHECK(canonical_link_url.size() > 0)
      << "Canonical link URLs should not be empty";
  DCHECK(!top_level_site.opaque())
      << "Do not call ComputePartitionedFingerprint with an opaque top-level "
         "site.";
  DCHECK(!frame_origin.opaque()) << "Do not call ComputePartitionedFingerprint "
                                    "with an opaque frame origin.";

  base::MD5Context ctx;
  base::MD5Init(&ctx);

  // Salt the hash.
  base::MD5Update(&ctx, std::string_view(reinterpret_cast<const char*>(&salt),
                                         sizeof(salt)));

  // Add the link url.
  base::MD5Update(&ctx, canonical_link_url);

  // Add the serialized schemeful top-level site.
  const std::string serialized_site = top_level_site.Serialize();
  base::MD5Update(
      &ctx, std::string_view(serialized_site.data(), serialized_site.size()));

  // Add the serialized frame origin.
  const std::string serialized_origin = frame_origin.Serialize();
  base::MD5Update(&ctx, std::string_view(serialized_origin.data(),
                                         serialized_origin.size()));
  base::MD5Digest digest;
  base::MD5Final(&digest, &ctx);

  return ConvertDigestToFingerprint(digest);
}

}  // namespace visitedlink