File: prefix_matcher.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (121 lines) | stat: -rw-r--r-- 4,302 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_PREFIX_MATCHER_H_
#define CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_PREFIX_MATCHER_H_

#include "base/memory/raw_ref.h"
#include "chromeos/ash/components/string_matching/tokenized_string.h"
#include "ui/gfx/range/range.h"

namespace ash::string_matching {

// These are described in more detail in the .cc file.
namespace prefix_matcher_constants {

constexpr double kIsPrefixCharScore = 1.0;
constexpr double kIsFrontOfTokenCharScore = 0.8;
constexpr double kIsWeakHitCharScore = 0.6;
constexpr double kNoMatchScore = 0.0;

}  // namespace prefix_matcher_constants

namespace {
struct MatchInfo {
 public:
  typedef std::vector<gfx::Range> Hits;

  MatchInfo();
  ~MatchInfo();

  MatchInfo(const MatchInfo&) = delete;
  MatchInfo& operator=(const MatchInfo&) = delete;

  double relevance = prefix_matcher_constants::kNoMatchScore;
  Hits hits;

  gfx::Range current_match = gfx::Range::InvalidRange();
  // The last query/text position that the relevance was updated.
  size_t last_query_pos = SIZE_MAX;
  size_t last_text_pos = SIZE_MAX;
  // Flag to track if we are still matching the prefixes of both the query and
  // text.
  bool is_front = true;
};
}  // namespace

// PrefixMatcher matches the chars of a given query as prefix of tokens in
// a given text. We give some specific scoring examples in the .cc file.
class PrefixMatcher {
 public:
  typedef std::vector<gfx::Range> Hits;

  PrefixMatcher(const TokenizedString& query, const TokenizedString& text);
  ~PrefixMatcher();

  PrefixMatcher(const PrefixMatcher&) = delete;
  PrefixMatcher& operator=(const PrefixMatcher&) = delete;

  // Return true if we found either sentence prefix matching or token prefix
  // matching. If no full match is found, return false.
  bool Match();

  double relevance() const { return relevance_; }
  const Hits& hits() const { return hits_; }

 private:
  // Stops on the first full sentence prefix match and updates the relevance
  // score. If no match found, set relevance as kNoMatchScore.
  //
  // We treat the following as sentence prefix match:
  // query        |       text
  // chromeos     | [chrome os] flex        (prefix)
  // chrome os    | google [chromeos]       (non-prefix)
  // google pixel | buy [google pixel]book  (unfinished)
  //
  // But not the following:
  // query        |       text
  // cof          | chrome os flex
  // go chrome    | google chromeos
  void SentencePrefixMatch(MatchInfo& sentence_match_info);

  // Stops on the first full token prefix match and updates the relevance
  // score. If no match found, set relevance as kNoMatchScore.
  //
  // We treat the following as token prefix match:
  // query        |       text
  // chrome store | my [chrome store]       (continuous)
  // chrome store | [chrome] web [store]    (discrete)
  // chrome google| [google chrome]         (unordered)
  // google pixel | buy [google pixel]book  (unfinished)
  //
  // But not the following:
  // query        |       text
  // cof          | chrome os flex
  // chrome flex  | chromeos flex
  //
  // The time complexity of the token prefix match algorithm is `O(m+n)`, where
  // m is the `num_query_token` and n is the `num_text_token`. O(m) to construct
  // the `query_map` and O(n) to traverse the text tokens to find matches. Each
  // text token will be compared at most twice (one for `query_map` and one for
  // last query token).
  void TokenPrefixMatch(MatchInfo& token_match_info);

  // Update the relevance score of token prefix based on the matched token. This
  // method can cope with full and partial token matches as it always update the
  // `relevance` and `hits` according the query size.
  void UpdateInfoForTokenPrefixMatch(size_t query_pos,
                                     size_t text_pos,
                                     MatchInfo& token_match_info);

  const raw_ref<const TokenizedString> query_;
  const raw_ref<const TokenizedString> text_;

  double relevance_ = prefix_matcher_constants::kNoMatchScore;
  Hits hits_;
};

}  // namespace ash::string_matching

#endif  // CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_PREFIX_MATCHER_H_