File: lens_url_matcher.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (222 lines) | stat: -rw-r--r-- 8,450 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ui/lens/lens_url_matcher.h"

#include "base/json/json_reader.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "third_party/farmhash/src/src/farmhash.h"

namespace lens {

namespace {

// Converts a JSON string array to a vector.
std::vector<std::string> JSONArrayToVector(const std::string& json_array) {
  std::optional<base::Value> json_value = base::JSONReader::Read(json_array);

  if (!json_value) {
    return {};
  }

  base::Value::List* entries = json_value->GetIfList();
  if (!entries) {
    return {};
  }

  std::vector<std::string> result;
  result.reserve(entries->size());
  for (const base::Value& entry : *entries) {
    const std::string* filter = entry.GetIfString();
    if (filter) {
      result.emplace_back(*filter);
    }
  }
  return result;
}

}  // namespace

LensUrlMatcher::LensUrlMatcher(std::string url_allow_filters,
                               std::string url_block_filters,
                               std::string path_match_allow_filters,
                               std::string path_match_block_filters,
                               std::string url_forced_allowed_match_patterns,
                               std::string hashed_domain_block_filters_list) {
  base::MatcherStringPattern::ID id(0);
  InitializeUrlMatcher(url_allow_filters, url_block_filters, &id);
  InitializeForceAllowUrlPatterns(url_forced_allowed_match_patterns, &id);
  InitializePathAllowMatcher(path_match_allow_filters, &id);
  InitializePathBlockMatcher(path_match_block_filters, &id);
  InitializeHashedDomainBlockFilters(hashed_domain_block_filters_list);
}

LensUrlMatcher::~LensUrlMatcher() = default;

void LensUrlMatcher::InitializeUrlMatcher(std::string url_allow_filters,
                                          std::string url_block_filters,
                                          base::MatcherStringPattern::ID* id) {
  url_matcher_ = std::make_unique<url_matcher::URLMatcher>();
  url_matcher::util::AddFiltersWithLimit(url_matcher_.get(), true, id,
                                         JSONArrayToVector(url_allow_filters),
                                         &url_filters_);
  url_matcher::util::AddFiltersWithLimit(url_matcher_.get(), false, id,
                                         JSONArrayToVector(url_block_filters),
                                         &url_filters_);
}

void LensUrlMatcher::InitializeForceAllowUrlPatterns(
    std::string url_path_forced_allowed_match_patterns,
    base::MatcherStringPattern::ID* id) {
  auto force_allow_url_strings =
      JSONArrayToVector(url_path_forced_allowed_match_patterns);
  std::vector<base::MatcherStringPattern> force_allow_url_patterns;
  std::vector<const base::MatcherStringPattern*> force_allow_url_pointers;
  force_allow_url_patterns.reserve(force_allow_url_strings.size());
  force_allow_url_pointers.reserve(force_allow_url_strings.size());
  for (const std::string& entry : force_allow_url_strings) {
    (*id)++;
    force_allow_url_patterns.emplace_back(entry, *id);
    force_allow_url_pointers.push_back(&force_allow_url_patterns.back());
  }
  url_forced_allow_matcher = std::make_unique<url_matcher::RegexSetMatcher>();
  // Pointers will not be referenced after AddPatterns() completes.
  url_forced_allow_matcher->AddPatterns(force_allow_url_pointers);
}

void LensUrlMatcher::InitializePathAllowMatcher(
    std::string path_match_allow_filters,
    base::MatcherStringPattern::ID* id) {
  const auto allow_strings = JSONArrayToVector(path_match_allow_filters);
  std::vector<base::MatcherStringPattern> allow_patterns;
  std::vector<const base::MatcherStringPattern*> allow_pointers;
  allow_patterns.reserve(allow_strings.size());
  allow_pointers.reserve(allow_strings.size());
  for (const std::string& entry : allow_strings) {
    (*id)++;
    allow_patterns.emplace_back(entry, *id);
    allow_pointers.push_back(&allow_patterns.back());
  }
  path_allow_matcher_ = std::make_unique<url_matcher::RegexSetMatcher>();
  // Pointers will not be referenced after AddPatterns() completes.
  path_allow_matcher_->AddPatterns(allow_pointers);
}

void LensUrlMatcher::InitializePathBlockMatcher(
    std::string path_match_block_filters,
    base::MatcherStringPattern::ID* id) {
  const auto block_strings = JSONArrayToVector(path_match_block_filters);
  std::vector<base::MatcherStringPattern> block_patterns;
  std::vector<const base::MatcherStringPattern*> block_pointers;
  block_patterns.reserve(block_strings.size());
  block_pointers.reserve(block_strings.size());
  for (const std::string& entry : block_strings) {
    (*id)++;
    block_patterns.emplace_back(entry, *id);
    block_pointers.push_back(&block_patterns.back());
  }
  path_block_matcher_ = std::make_unique<url_matcher::RegexSetMatcher>();
  // Pointers will not be referenced after AddPatterns() completes.
  path_block_matcher_->AddPatterns(block_pointers);
}

void LensUrlMatcher::InitializeHashedDomainBlockFilters(
    std::string hashed_domain_block_filters_list) {
  for (std::string_view hash_string :
       base::SplitStringPiece(hashed_domain_block_filters_list, ",",
                              base::WhitespaceHandling::TRIM_WHITESPACE,
                              base::SplitResult::SPLIT_WANT_NONEMPTY)) {
    uint32_t hash;
    if (base::StringToUint(hash_string, &hash)) {
      hashed_domain_block_filters_.insert(hash);
    }
  }
}

bool LensUrlMatcher::IsMatch(const GURL& url) {
  // Check if the URL matches any of the allow filters. If it does not, return
  // false immediately to block this URL.
  auto matches = url_matcher_.get()->MatchURL(url);
  if (!matches.size()) {
    return false;
  }

  // Now that the URL is allowed, check if it matches any of the block filters.
  // If it does, return false to block this URL.
  for (auto match : matches) {
    // Blocks take precedence over allows.
    if (!url_filters_[match].allow) {
      return false;
    }
  }

  // Check if the domain matches any of the hashed block filters. If it does,
  // return false to block this URL.
  if (SubdomainsMatchHash(url.host())) {
    return false;
  }

  // Check if the path matches the path block matcher. If it does, return false
  // to block this URL.
  if (path_block_matcher_ && !path_block_matcher_->IsEmpty() &&
      path_block_matcher_->Match(url.path(), &matches)) {
    return false;
  }

  // Check if the URL matches any of the forced allowed URLs. If it does, return
  // true as this should be a shown match even if the path does not contain an
  // allowlisted pattern (below).
  if (url_forced_allow_matcher && !url_forced_allow_matcher->IsEmpty() &&
      url_forced_allow_matcher->Match(url.spec(), &matches)) {
    return true;
  }

  // Finally, check if the path matches the path allow matcher. If it doesn't,
  // return false to block this URL.
  if (path_allow_matcher_ && !path_allow_matcher_->IsEmpty() &&
      !path_allow_matcher_->Match(url.path(), &matches)) {
    return false;
  }

  // Finally if all checks pass, this must be a valid match, i.e.:
  // 1. The URL matches at least one of the allowed URLs.
  // 2. The URL does not match any of the blocked URLs.
  // 3. The domain does not match any of the hashed blocked domains.
  // 4. The URL does not match any of the block path patterns.
  // 5. The URL either matches the force allowed patterns, or matches at least
  //    one of the allowed path patterns.
  return true;
}

bool LensUrlMatcher::SubdomainsMatchHash(std::string str) {
  // Remove any periods from the start and end of the hostname.
  size_t start = str.find_first_not_of('.');
  if (start == std::string::npos) {
    return false;
  }
  size_t end = str.find_last_not_of('.');
  return SubdomainsMatchHash(
      std::string_view(str).substr(start, 1 + end - start));
}

bool LensUrlMatcher::SubdomainsMatchHash(std::string_view str) {
  if (MatchesHash(str)) {
    return true;
  }

  size_t found = str.find('.');
  if (found == std::string::npos) {
    // Top-level domain.
    return false;
  }
  return SubdomainsMatchHash(str.substr(found + 1));
}

bool LensUrlMatcher::MatchesHash(std::string_view str) {
  uint32_t hash = util::Fingerprint32(str);
  return hashed_domain_block_filters_.contains(hash);
}

}  // namespace lens