File: regex_rules_matcher.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (170 lines) | stat: -rw-r--r-- 7,426 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
#define EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_

#include <memory>

#include "base/memory/raw_ptr.h"
#include "base/substring_set_matcher/substring_set_matcher.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_base.h"
#include "third_party/re2/src/re2/filtered_re2.h"

namespace extensions::declarative_net_request {

// Structure to hold a RegexRule together with its corresponding compiled
// re2::Re2 object.
struct RegexRuleInfo {
  RegexRuleInfo(const flat::RegexRule* regex_rule, const re2::RE2* regex);
  RegexRuleInfo(const RegexRuleInfo& info);
  RegexRuleInfo& operator=(const RegexRuleInfo& info);
  raw_ptr<const flat::RegexRule> regex_rule;
  raw_ptr<const re2::RE2, DanglingUntriaged> regex;
};

// RegexRulesMatcher deals with matching of regular expression rules. It is an
// implementation detail of RulesetMatcher. This uses the FilteredRE2 class from
// the re2 library to achieve fast matching of a set of declarative regex rules
// against a request. How this works:
//
// Initialization:
// 1. During initialization, we add each regex to the FilteredRE2 class.
// 2. We compile the FilteredRE2 object which returns us a set of substrings.
//    These are added to `substring_matcher_` for use in #3 below.
//
// Matching
// 3. Given a request url, we find the set of strings from #2. that are
//    substrings of the request url. This uses the
//    url_matcher::SubstringSetMatcher class which internally uses the
//    Aho-Corasick algorithm.
// 4. Given the list of matched strings from #3, FilteredRE2 returns the list
//    of regexes (rules) that might potentially match. To reduce the number of
//    regexes that need to be matched (since it's expensive), we prune the list
//    even further by checking if the rule metadata matches the request.
// 5. Given the list of potentially matching rules, we finally match the actual
//    regexes against the request url, as required.
class RegexRulesMatcher final : public RulesetMatcherBase {
 public:
  using RegexRulesList =
      ::flatbuffers::Vector<flatbuffers::Offset<flat::RegexRule>>;
  using RegexMatchKey =
      std::pair<const RegexRulesMatcher*, RulesetMatchingStage>;
  RegexRulesMatcher(const ExtensionId& extension_id,
                    RulesetID ruleset_id,
                    const RegexRulesList* before_request_regex_list,
                    const RegexRulesList* headers_received_regex_list,
                    const ExtensionMetadataList* metadata_list);

  RegexRulesMatcher(const RegexRulesMatcher&) = delete;
  RegexRulesMatcher& operator=(const RegexRulesMatcher&) = delete;

  // RulesetMatcherBase override:
  ~RegexRulesMatcher() override;
  std::vector<RequestAction> GetModifyHeadersActions(
      const RequestParams& params,
      RulesetMatchingStage stage,
      std::optional<uint64_t> min_priority) const override;
  bool IsExtraHeadersMatcher() const override;
  size_t GetRulesCount() const override;
  size_t GetBeforeRequestRulesCount() const override;
  size_t GetHeadersReceivedRulesCount() const override;

 private:
  // A helper class used to match regex rules from a single ruleset for a single
  // request stage.
  class MatchHelper {
   public:
    MatchHelper(const raw_ptr<const RegexRulesList> regex_list,
                const RegexRulesMatcher* parent_matcher,
                RulesetMatchingStage stage);
    ~MatchHelper();
    MatchHelper(const MatchHelper& other) = delete;
    MatchHelper& operator=(const MatchHelper& other) = delete;

    // Returns the rule count for regex rules to be matched in the request stage
    // corresponding to this MatchHelper.
    size_t GetRulesCount() const;

    // Returns the potentially matching rules for the given request. A
    // potentially matching rule is one whose metadata matches the given request
    // `params` and which is not ruled out as a potential match by the
    // `filtered_re2_` object. Note: The returned vector is sorted in descending
    // order of rule priority.
    const std::vector<RegexRuleInfo>& GetPotentialMatches(
        const RequestParams& params) const;

   private:
    // Helper to build the necessary data structures for matching.
    void InitializeMatcher();

    // Returns true if this matcher doesn't correspond to any rules.
    bool IsEmpty() const;

    // The backing regex rules list for this MatchHelper. Contains all rules
    // that are meant to be matched for a single request stage.
    const raw_ptr<const RegexRulesList> regex_list_;

    // The key used to cache potential regex matches from this helper in a
    // RequestParams. Consists of a pointer to the RegexRulesMatcher which owns
    // this helper and the request stage in which this helper will match rules.
    RegexMatchKey regex_match_key_;

    // Data structures used for matching. Initialized during construction in
    // InitializeMatcher() and immutable for the rest of the object lifetime.

    // This provides a pre-filtering mechanism, to reduce the number of regular
    // expressions that are actually matched against a request.
    re2::FilteredRE2 filtered_re2_;

    // Map from re2 ID (as used by `filtered_re2_`) to the flat::RegexRule in
    // `regex_list_`.
    std::map<int, raw_ptr<const flat::RegexRule, CtnExperimental>>
        re2_id_to_rules_map_;

    // Structure for fast substring matching. Given a string S and a set of
    // candidate strings, returns the sub-set of candidate strings that are a
    // substring of S. Uses the Aho-Corasick algorithm internally. Will be null
    // iff IsEmpty() returns false.
    std::unique_ptr<base::SubstringSetMatcher> substring_matcher_;
  };

  // RulesetMatcherBase override:
  std::optional<RequestAction> GetAllowAllRequestsAction(
      const RequestParams& params,
      RulesetMatchingStage stage) const override;
  std::optional<RequestAction> GetActionIgnoringAncestors(
      const RequestParams& params,
      RulesetMatchingStage stage) const override;

  // Returns a RequestAction for the given matched regex rule `info`.
  std::optional<RequestAction> CreateActionFromInfo(
      const RequestParams& params,
      const RegexRuleInfo& info) const;

  // Returns a RequestAction for the the given regex substitution rule.
  std::optional<RequestAction> CreateRegexSubstitutionRedirectAction(
      const RequestParams& params,
      const RegexRuleInfo& info) const;

  // Returns the corresponding rule matching helper for the given rule matching
  // `stage`.
  const MatchHelper& GetMatcherForStage(RulesetMatchingStage stage) const;

  // A helper for matching regex rules in the onBeforeRequest stage.
  MatchHelper before_request_matcher_;

  // A helper for matching regex rules in the onHeadersReceived stage.
  MatchHelper headers_received_matcher_;

  const raw_ptr<const ExtensionMetadataList> metadata_list_;

  // Whether this matcher contains rules that will match on, or modify headers.
  const bool is_extra_headers_matcher_;
};

}  // namespace extensions::declarative_net_request

#endif  // EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_