1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "autocomplete_match_classification.h"
#include <string>
#include <string_view>
#include "base/i18n/case_conversion.h"
#include "base/strings/string_util.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"
#include "components/omnibox/browser/scored_history_match.h"
#include "in_memory_url_index_types.h"
namespace {
std::u16string clean(std::u16string_view text) {
const size_t kMaxTextLength = 2000;
return base::i18n::ToLower(text.substr(0, kMaxTextLength));
}
} // namespace
ACMatchClassifications ClassifyAllMatchesInString(
const std::u16string& find_text,
const std::u16string& text,
const bool text_is_search_query,
const ACMatchClassifications& original_class) {
DCHECK(!find_text.empty());
if (text.empty()) {
return original_class;
}
TermMatches term_matches = FindTermMatches(find_text, text);
ACMatchClassifications classifications;
if (text_is_search_query) {
classifications = ClassifyTermMatches(term_matches, text.size(),
ACMatchClassification::NONE,
ACMatchClassification::MATCH);
} else {
classifications = ClassifyTermMatches(term_matches, text.size(),
ACMatchClassification::MATCH,
ACMatchClassification::NONE);
}
return AutocompleteMatch::MergeClassifications(original_class,
classifications);
}
TermMatches FindTermMatches(std::u16string_view find_text,
std::u16string_view text,
bool allow_prefix_matching,
bool allow_mid_word_matching) {
std::u16string find_text_str = clean(find_text);
std::u16string text_str = clean(text);
// Some international characters become multiple characters when converting
// case. E.g. Armenian և is 1 character lowercase, but 2 characters Եվ
// uppercased. Turkish has examples of the opposite, where the lowercasing can
// increase length. If the string length changes when lowercased, term match
// indexes will be off when being used to style the original-cased `text`.
// This will cause either `DCHECK` crashes if the incorrect index is out of
// bounds; or incorrect styling if the incorrect index remains in bounds or
// `DCHECK`s are disabled. E.g. input 'ou' would bold 'Yo[uT]ube' if lower
// case 'Y' is 2 characters.
if (find_text_str.size() != find_text.size() ||
text_str.size() != text.size()) {
return {};
}
if (find_text_str.empty()) {
return {};
}
if (allow_prefix_matching &&
base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
return {{0, 0, find_text_str.length()}};
}
String16Vector find_terms =
String16VectorFromString16(find_text_str, nullptr);
WordStarts word_starts;
// `word_starts` is unused if `allow_mid_word_matching` is true.
if (!allow_mid_word_matching) {
String16VectorFromString16(text_str, &word_starts);
}
return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
text_str, word_starts,
allow_mid_word_matching);
}
TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
const WordStarts& find_terms_word_starts,
const std::u16string& cleaned_text,
const WordStarts& text_word_starts,
bool allow_mid_word_matching) {
TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
matches = SortMatches(matches);
matches = DeoverlapMatches(matches);
if (allow_mid_word_matching)
return matches;
return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
}
ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
size_t text_length,
int match_style,
int non_match_style) {
ACMatchClassifications classes;
if (matches.empty()) {
if (text_length)
classes.push_back(ACMatchClassification(0, non_match_style));
return classes;
}
if (matches[0].offset)
classes.push_back(ACMatchClassification(0, non_match_style));
size_t match_count = matches.size();
for (size_t i = 0; i < match_count;) {
size_t offset = matches[i].offset;
classes.push_back(ACMatchClassification(offset, match_style));
// Skip all adjacent matches.
do {
offset += matches[i].length;
++i;
} while ((i < match_count) && (offset == matches[i].offset));
if (offset < text_length)
classes.push_back(ACMatchClassification(offset, non_match_style));
}
return classes;
}
|