File: autocomplete_match_classification.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (137 lines) | stat: -rw-r--r-- 5,188 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "autocomplete_match_classification.h"

#include <string>
#include <string_view>

#include "base/i18n/case_conversion.h"
#include "base/strings/string_util.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"
#include "components/omnibox/browser/scored_history_match.h"
#include "in_memory_url_index_types.h"

namespace {

std::u16string clean(std::u16string_view text) {
  const size_t kMaxTextLength = 2000;
  return base::i18n::ToLower(text.substr(0, kMaxTextLength));
}

}  // namespace

ACMatchClassifications ClassifyAllMatchesInString(
    const std::u16string& find_text,
    const std::u16string& text,
    const bool text_is_search_query,
    const ACMatchClassifications& original_class) {
  DCHECK(!find_text.empty());

  if (text.empty()) {
    return original_class;
  }

  TermMatches term_matches = FindTermMatches(find_text, text);

  ACMatchClassifications classifications;
  if (text_is_search_query) {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::NONE,
                                          ACMatchClassification::MATCH);
  } else {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::MATCH,
                                          ACMatchClassification::NONE);
  }

  return AutocompleteMatch::MergeClassifications(original_class,
                                                 classifications);
}

TermMatches FindTermMatches(std::u16string_view find_text,
                            std::u16string_view text,
                            bool allow_prefix_matching,
                            bool allow_mid_word_matching) {
  std::u16string find_text_str = clean(find_text);
  std::u16string text_str = clean(text);

  // Some international characters become multiple characters when converting
  // case. E.g. Armenian և is 1 character lowercase, but 2 characters Եվ
  // uppercased. Turkish has examples of the opposite, where the lowercasing can
  // increase length. If the string length changes when lowercased, term match
  // indexes will be off when being used to style the original-cased `text`.
  // This will cause either `DCHECK` crashes if the incorrect index is out of
  // bounds; or incorrect styling if the incorrect index remains in bounds or
  // `DCHECK`s are disabled. E.g. input 'ou' would bold 'Yo[uT]ube' if lower
  // case 'Y' is 2 characters.
  if (find_text_str.size() != find_text.size() ||
      text_str.size() != text.size()) {
    return {};
  }

  if (find_text_str.empty()) {
    return {};
  }

  if (allow_prefix_matching &&
      base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
    return {{0, 0, find_text_str.length()}};
  }

  String16Vector find_terms =
      String16VectorFromString16(find_text_str, nullptr);
  WordStarts word_starts;
  // `word_starts` is unused if `allow_mid_word_matching` is true.
  if (!allow_mid_word_matching) {
    String16VectorFromString16(text_str, &word_starts);
  }
  return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
                                 text_str, word_starts,
                                 allow_mid_word_matching);
}

TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                    const WordStarts& find_terms_word_starts,
                                    const std::u16string& cleaned_text,
                                    const WordStarts& text_word_starts,
                                    bool allow_mid_word_matching) {
  TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
  matches = SortMatches(matches);
  matches = DeoverlapMatches(matches);

  if (allow_mid_word_matching)
    return matches;

  return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
      matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
}

ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
                                           size_t text_length,
                                           int match_style,
                                           int non_match_style) {
  ACMatchClassifications classes;
  if (matches.empty()) {
    if (text_length)
      classes.push_back(ACMatchClassification(0, non_match_style));
    return classes;
  }
  if (matches[0].offset)
    classes.push_back(ACMatchClassification(0, non_match_style));
  size_t match_count = matches.size();
  for (size_t i = 0; i < match_count;) {
    size_t offset = matches[i].offset;
    classes.push_back(ACMatchClassification(offset, match_style));
    // Skip all adjacent matches.
    do {
      offset += matches[i].length;
      ++i;
    } while ((i < match_count) && (offset == matches[i].offset));
    if (offset < text_length)
      classes.push_back(ACMatchClassification(offset, non_match_style));
  }
  return classes;
}