File: fuzzy_pattern_matching.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (70 lines) | stat: -rw-r--r-- 2,337 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// The separator placeholder '^' symbol is used in subpatterns to match any
// separator character, which is any ASCII symbol except letters, digits, and
// the following: '_', '-', '.', '%'. Note that the separator placeholder
// character '^' is itself a separator, as well as '\0'.
//
// In addition, a separator placeholder at the end of the pattern can be matched
// by the end of |text|. This should be handled by the clients using the
// following utility functions.
//
// We define a fuzzy occurrence as an occurrence of a |subpattern| in |text|
// such that all its non-placeholder characters are equal to the corresponding
// characters of the |text|, whereas each '^' placeholder can correspond to any
// type of separator in |text|.

#ifndef COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_
#define COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_

#include <stddef.h>

#include <string_view>

namespace url_pattern_index {

constexpr char kSeparatorPlaceholder = '^';

inline bool IsAscii(char c) {
  return !(c & ~0x7F);
}

inline bool IsAlphaNumericAscii(char c) {
  if (c <= '9')
    return c >= '0';
  c |= 0x20;  // Puts all alphabetics (and only them) into the 'a'-'z' range.
  return c >= 'a' && c <= 'z';
}

// Returns whether |c| is a separator.
inline bool IsSeparator(char c) {
  switch (c) {
    case '_':
    case '-':
    case '.':
    case '%':
      return false;
    case kSeparatorPlaceholder:
      return true;
    default:
      return !IsAlphaNumericAscii(c) && IsAscii(c);
  }
}

// Returns whether |text| starts with a fuzzy occurrence of |subpattern|.
bool StartsWithFuzzy(std::string_view text, std::string_view subpattern);

// Returns whether |text| ends with a fuzzy occurrence of |subpattern|.
bool EndsWithFuzzy(std::string_view text, std::string_view subpattern);

// Returns the position of the leftmost fuzzy occurrence of a |subpattern| in
// the |text| starting no earlier than |from| the specified position.
size_t FindFuzzy(std::string_view text,
                 std::string_view subpattern,
                 size_t from = 0);

}  // namespace url_pattern_index

#endif  // COMPONENTS_URL_PATTERN_INDEX_FUZZY_PATTERN_MATCHING_H_