File: regex_set_matcher.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (112 lines) | stat: -rw-r--r-- 3,429 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/regex_set_matcher.h"

#include <stddef.h>

#include <memory>
#include <utility>

#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/substring_set_matcher/substring_set_matcher.h"
#include "third_party/re2/src/re2/filtered_re2.h"
#include "third_party/re2/src/re2/re2.h"

using base::MatcherStringPattern;

namespace url_matcher {

RegexSetMatcher::RegexSetMatcher() = default;
RegexSetMatcher::~RegexSetMatcher() = default;

void RegexSetMatcher::AddPatterns(
    const std::vector<const MatcherStringPattern*>& regex_list) {
  if (regex_list.empty())
    return;
  for (size_t i = 0; i < regex_list.size(); ++i) {
    regexes_[regex_list[i]->id()] = regex_list[i];
  }

  RebuildMatcher();
}

void RegexSetMatcher::ClearPatterns() {
  regexes_.clear();
  RebuildMatcher();
}

bool RegexSetMatcher::Match(const std::string& text,
                            std::set<MatcherStringPattern::ID>* matches) const {
  size_t old_number_of_matches = matches->size();
  if (regexes_.empty())
    return false;
  if (!filtered_re2_) {
    LOG(ERROR) << "RegexSetMatcher was not initialized";
    return false;
  }

  // FilteredRE2 expects lowercase for prefiltering, but we still
  // match case-sensitively.
  std::vector<RE2ID> atoms(FindSubstringMatches(base::ToLowerASCII(text)));

  std::vector<RE2ID> re2_ids;
  filtered_re2_->AllMatches(text, atoms, &re2_ids);

  for (size_t i = 0; i < re2_ids.size(); ++i) {
    MatcherStringPattern::ID id = re2_id_map_[re2_ids[i]];
    matches->insert(id);
  }
  return old_number_of_matches != matches->size();
}

bool RegexSetMatcher::IsEmpty() const {
  return regexes_.empty();
}

std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
    const std::string& text) const {
  std::set<base::MatcherStringPattern::ID> atoms_set;
  substring_matcher_->Match(text, &atoms_set);
  return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
}

void RegexSetMatcher::RebuildMatcher() {
  re2_id_map_.clear();
  filtered_re2_ = std::make_unique<re2::FilteredRE2>();
  if (regexes_.empty())
    return;

  for (auto it = regexes_.begin(); it != regexes_.end(); ++it) {
    RE2ID re2_id;
    RE2::ErrorCode error =
        filtered_re2_->Add(it->second->pattern(), RE2::DefaultOptions, &re2_id);
    if (error == RE2::NoError) {
      DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
      re2_id_map_.push_back(it->first);
    } else {
      // Unparseable regexes should have been rejected already in
      // URLMatcherFactory::CreateURLMatchesCondition.
      LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
                 << it->second->pattern() << ")";
    }
  }

  std::vector<std::string> strings_to_match;
  filtered_re2_->Compile(&strings_to_match);

  std::vector<MatcherStringPattern> substring_patterns;
  substring_patterns.reserve(strings_to_match.size());

  // Build SubstringSetMatcher from |strings_to_match|.
  for (size_t i = 0; i < strings_to_match.size(); ++i)
    substring_patterns.emplace_back(std::move(strings_to_match[i]), i);

  substring_matcher_ = std::make_unique<base::SubstringSetMatcher>();
  bool success = substring_matcher_->Build(substring_patterns);
  CHECK(success);
}

}  // namespace url_matcher