File: visitedlink_common.cc

package info (click to toggle)
chromium 141.0.7390.107-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,246,132 kB
  • sloc: cpp: 35,264,965; ansic: 7,169,920; javascript: 4,250,185; python: 1,460,635; asm: 950,788; xml: 751,751; pascal: 187,972; sh: 89,459; perl: 88,691; objc: 79,953; sql: 53,924; cs: 44,622; fortran: 24,137; makefile: 22,313; tcl: 15,277; php: 14,018; yacc: 8,995; ruby: 7,553; awk: 3,720; lisp: 3,096; lex: 1,330; ada: 727; jsp: 228; sed: 36
file content (161 lines) | stat: -rw-r--r-- 5,357 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/visitedlink/common/visitedlink_common.h"

#include <string.h>  // for memset()

#include <ostream>
#include <string_view>

#include "base/bit_cast.h"
#include "base/check.h"
#include "base/compiler_specific.h"
#include "base/notreached.h"
#include "base/numerics/byte_conversions.h"
#include "components/visitedlink/core/visited_link.h"
#include "crypto/obsolete/md5.h"
#include "net/base/schemeful_site.h"
#include "url/gurl.h"
#include "url/origin.h"

namespace {
visitedlink::VisitedLinkCommon::Fingerprint ConvertDigestToFingerprint(
    std::array<uint8_t, crypto::obsolete::Md5::kSize> digest) {
  auto first = base::span(digest).first<sizeof(uint64_t)>();
  return base::U64FromNativeEndian(first);
}
}  // namespace

namespace visitedlink {

crypto::obsolete::Md5 MakeMd5HasherForVisitedLink() {
  return {};
}

const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0;
const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1;

VisitedLinkCommon::VisitedLinkCommon() = default;

VisitedLinkCommon::~VisitedLinkCommon() = default;

// FIXME: this uses linear probing, it should be replaced with quadratic
// probing or something better. See VisitedLinkWriter::AddFingerprint
bool VisitedLinkCommon::IsVisited(std::string_view canonical_url) const {
  if (canonical_url.size() == 0) {
    return false;
  }
  if (!hash_table_ || table_length_ == 0) {
    return false;
  }
  return IsVisited(ComputeURLFingerprint(canonical_url));
}

bool VisitedLinkCommon::IsVisited(const GURL& url) const {
  return IsVisited(url.spec());
}

bool VisitedLinkCommon::IsVisited(const VisitedLink& link, uint64_t salt) {
  if (!hash_table_ || table_length_ == 0) {
    return false;
  }
  if (!link.IsValid()) {
    return false;
  }
  return IsVisited(ComputePartitionedFingerprint(link, salt));
}

bool VisitedLinkCommon::IsVisited(const GURL& link_url,
                                  const net::SchemefulSite& top_level_site,
                                  const url::Origin& frame_origin,
                                  uint64_t salt) {
  const VisitedLink link = {link_url, top_level_site, frame_origin};
  return IsVisited(link, salt);
}

bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const {
  // Go through the table until we find the item or an empty spot (meaning it
  // wasn't found). This loop will terminate as long as the table isn't full,
  // which should be enforced by AddFingerprint.
  Hash first_hash = HashFingerprint(fingerprint);
  Hash cur_hash = first_hash;
  while (true) {
    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    if (cur_fingerprint == null_fingerprint_)
      return false;  // End of probe sequence found.
    if (cur_fingerprint == fingerprint)
      return true;  // Found a match.

    // This spot was taken, but not by the item we're looking for, search in
    // the next position.
    cur_hash++;
    if (cur_hash == table_length_)
      cur_hash = 0;
    if (cur_hash == first_hash) {
      // Wrapped around and didn't find an empty space, this means we're in an
      // infinite loop because AddFingerprint didn't do its job resizing.
      NOTREACHED();
    }
  }
}

// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
// this is as random as any other subset of the MD5SUM.

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint(
    std::string_view canonical_url,
    const uint8_t salt[LINK_SALT_LENGTH]) {
  DCHECK(canonical_url.size() > 0) << "Canonical URLs should not be empty";

  auto md5 = MakeMd5HasherForVisitedLink();
  UNSAFE_BUFFERS(
      // SAFETY: salt is a reference to a local array which we know is the right
      // size.
      md5.Update(base::span<const uint8_t>(
          salt, base::checked_cast<size_t>(LINK_SALT_LENGTH)));)
  md5.Update(canonical_url);
  return ConvertDigestToFingerprint(md5.Finish());
}

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputePartitionedFingerprint(
    const VisitedLink& link,
    uint64_t salt) {
  return ComputePartitionedFingerprint(
      link.link_url.spec(), link.top_level_site, link.frame_origin, salt);
}

// static
VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputePartitionedFingerprint(
    std::string_view canonical_link_url,
    const net::SchemefulSite& top_level_site,
    const url::Origin& frame_origin,
    uint64_t salt) {
  DCHECK(canonical_link_url.size() > 0)
      << "Canonical link URLs should not be empty";
  DCHECK(!top_level_site.opaque())
      << "Do not call ComputePartitionedFingerprint with an opaque top-level "
         "site.";
  DCHECK(!frame_origin.opaque()) << "Do not call ComputePartitionedFingerprint "
                                    "with an opaque frame origin.";

  auto md5 = MakeMd5HasherForVisitedLink();

  // Salt the hash.
  md5.Update(base::byte_span_from_ref(salt));

  // Add the link url.
  md5.Update(canonical_link_url);

  // Add the serialized schemeful top-level site.
  md5.Update(top_level_site.Serialize());

  // Add the serialized frame origin.
  md5.Update(frame_origin.Serialize());
  return ConvertDigestToFingerprint(md5.Finish());
}

}  // namespace visitedlink