File: autocomplete_match_classification.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (119 lines) | stat: -rw-r--r-- 4,305 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "autocomplete_match_classification.h"

#include "base/i18n/case_conversion.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/omnibox/browser/scored_history_match.h"
#include "in_memory_url_index_types.h"

namespace {

std::u16string clean(std::u16string_view text) {
  const size_t kMaxTextLength = 2000;
  return base::i18n::ToLower(text.substr(0, kMaxTextLength));
}

}  // namespace

ACMatchClassifications ClassifyAllMatchesInString(
    const std::u16string& find_text,
    const std::u16string& text,
    const bool text_is_search_query,
    const ACMatchClassifications& original_class) {
  DCHECK(!find_text.empty());

  if (text.empty()) {
    return original_class;
  }

  TermMatches term_matches = FindTermMatches(find_text, text);

  ACMatchClassifications classifications;
  if (text_is_search_query) {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::NONE,
                                          ACMatchClassification::MATCH);
  } else {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::MATCH,
                                          ACMatchClassification::NONE);
  }

  return AutocompleteMatch::MergeClassifications(original_class,
                                                 classifications);
}

TermMatches FindTermMatches(std::u16string_view find_text,
                            std::u16string_view text,
                            bool allow_prefix_matching,
                            bool allow_mid_word_matching) {
  std::u16string find_text_str = clean(find_text);
  std::u16string text_str = clean(text);

  if (find_text_str.empty()) {
    return {};
  }

  if (allow_prefix_matching &&
      base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
    return {{0, 0, find_text_str.length()}};
  }

  String16Vector find_terms =
      String16VectorFromString16(find_text_str, nullptr);
  WordStarts word_starts;
  // `word_starts` is unused if `allow_mid_word_matching` is true.
  if (!allow_mid_word_matching) {
    String16VectorFromString16(text_str, &word_starts);
  }
  return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
                                 text_str, word_starts,
                                 allow_mid_word_matching);
}

TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                    const WordStarts& find_terms_word_starts,
                                    const std::u16string& cleaned_text,
                                    const WordStarts& text_word_starts,
                                    bool allow_mid_word_matching) {
  TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
  matches = SortMatches(matches);
  matches = DeoverlapMatches(matches);

  if (allow_mid_word_matching)
    return matches;

  return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
      matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
}

ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
                                           size_t text_length,
                                           int match_style,
                                           int non_match_style) {
  ACMatchClassifications classes;
  if (matches.empty()) {
    if (text_length)
      classes.push_back(ACMatchClassification(0, non_match_style));
    return classes;
  }
  if (matches[0].offset)
    classes.push_back(ACMatchClassification(0, non_match_style));
  size_t match_count = matches.size();
  for (size_t i = 0; i < match_count;) {
    size_t offset = matches[i].offset;
    classes.push_back(ACMatchClassification(offset, match_style));
    // Skip all adjacent matches.
    do {
      offset += matches[i].length;
      ++i;
    } while ((i < match_count) && (offset == matches[i].offset));
    if (offset < text_length)
      classes.push_back(ACMatchClassification(offset, non_match_style));
  }
  return classes;
}