File: filtered_re2.h

package info (click to toggle)
qtwebengine-opensource-src 5.7.1%2Bdfsg-6.1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,028,096 kB
  • ctags: 1,436,736
  • sloc: cpp: 5,960,176; ansic: 3,477,727; asm: 395,492; python: 320,633; sh: 96,504; perl: 69,449; xml: 42,977; makefile: 28,408; objc: 9,962; yacc: 9,847; tcl: 5,430; lex: 2,259; ruby: 1,053; lisp: 522; awk: 497; pascal: 310; cs: 249; sed: 53
file content (110 lines) | stat: -rw-r--r-- 4,067 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright 2009 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#ifndef RE2_FILTERED_RE2_H_
#define RE2_FILTERED_RE2_H_

// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
// It provides a prefilter mechanism that helps in cutting down the
// number of regexps that need to be actually searched.
//
// By design, it does not include a string matching engine. This is to
// allow the user of the class to use their favorite string match
// engine. The overall flow is: Add all the regexps using Add, then
// Compile the FilteredRE2. The compile returns strings that need to
// be matched. Note that all returned strings are lowercase. For
// applying regexps to a search text, the caller does the string
// matching using the strings returned. When doing the string match,
// note that the caller has to do that on lower cased version of the
// search text. Then call FirstMatch or AllMatches with a vector of
// indices of strings that were found in the text to get the actual
// regexp matches.

#include <string>
#include <vector>

#include "re2/re2.h"

namespace re2 {

class PrefilterTree;

class FilteredRE2 {
 public:
  FilteredRE2();
  explicit FilteredRE2(int min_atom_len);
  ~FilteredRE2();

  // Uses RE2 constructor to create a RE2 object (re). Returns
  // re->error_code(). If error_code is other than NoError, then re is
  // deleted and not added to re2_vec_.
  RE2::ErrorCode Add(const StringPiece& pattern,
                     const RE2::Options& options,
                     int *id);

  // Prepares the regexps added by Add for filtering.  Returns a set
  // of strings that the caller should check for in candidate texts.
  // The returned strings are lowercased. When doing string matching,
  // the search text should be lowercased first to find matching
  // strings from the set of strings returned by Compile.  Call after
  // all Add calls are done.
  void Compile(std::vector<string>* strings_to_match);

  // Returns the index of the first matching regexp.
  // Returns -1 on no match. Can be called prior to Compile.
  // Does not do any filtering: simply tries to Match the
  // regexps in a loop.
  int SlowFirstMatch(const StringPiece& text) const;

  // Returns the index of the first matching regexp.
  // Returns -1 on no match. Compile has to be called before
  // calling this.
  int FirstMatch(const StringPiece& text,
                 const std::vector<int>& atoms) const;

  // Returns the indices of all matching regexps, after first clearing
  // matched_regexps.
  bool AllMatches(const StringPiece& text,
                  const std::vector<int>& atoms,
                  std::vector<int>* matching_regexps) const;

  // Returns the indices of all potentially matching regexps after first
  // clearing potential_regexps.
  // A regexp is potentially matching if it passes the filter.
  // If a regexp passes the filter it may still not match.
  // A regexp that does not pass the filter is guaranteed to not match.
  void AllPotentials(const std::vector<int>& atoms,
                     std::vector<int>* potential_regexps) const;

  // The number of regexps added.
  int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }

 private:

  // Get the individual RE2 objects. Useful for testing.
  RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; }

  // Print prefilter.
  void PrintPrefilter(int regexpid);

  // Useful for testing and debugging.
  void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
                           std::vector<int>* passed_regexps);

  // All the regexps in the FilteredRE2.
  std::vector<RE2*> re2_vec_;

  // Has the FilteredRE2 been compiled using Compile()
  bool compiled_;

  // An AND-OR tree of string atoms used for filtering regexps.
  PrefilterTree* prefilter_tree_;

  FilteredRE2(const FilteredRE2&) = delete;
  FilteredRE2& operator=(const FilteredRE2&) = delete;
};

}  // namespace re2

#endif  // RE2_FILTERED_RE2_H_