File: autocomplete_match_classification.cc

package info (click to toggle)

chromium 139.0.7258.127-1

links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (137 lines) | stat: -rw-r--r-- 5,188 bytes

parent folder | download | duplicates (3)

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "autocomplete_match_classification.h"

#include <string>
#include <string_view>

#include "base/i18n/case_conversion.h"
#include "base/strings/string_util.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"
#include "components/omnibox/browser/scored_history_match.h"
#include "in_memory_url_index_types.h"

namespace {

std::u16string clean(std::u16string_view text) {
  const size_t kMaxTextLength = 2000;
  return base::i18n::ToLower(text.substr(0, kMaxTextLength));
}

}  // namespace

ACMatchClassifications ClassifyAllMatchesInString(
    const std::u16string& find_text,
    const std::u16string& text,
    const bool text_is_search_query,
    const ACMatchClassifications& original_class) {
  DCHECK(!find_text.empty());

  if (text.empty()) {
    return original_class;
  }

  TermMatches term_matches = FindTermMatches(find_text, text);

  ACMatchClassifications classifications;
  if (text_is_search_query) {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::NONE,
                                          ACMatchClassification::MATCH);
  } else {
    classifications = ClassifyTermMatches(term_matches, text.size(),
                                          ACMatchClassification::MATCH,
                                          ACMatchClassification::NONE);
  }

  return AutocompleteMatch::MergeClassifications(original_class,
                                                 classifications);
}

TermMatches FindTermMatches(std::u16string_view find_text,
                            std::u16string_view text,
                            bool allow_prefix_matching,
                            bool allow_mid_word_matching) {
  std::u16string find_text_str = clean(find_text);
  std::u16string text_str = clean(text);

  // Some international characters become multiple characters when converting
  // case. E.g. Armenian և is 1 character lowercase, but 2 characters Եվ
  // uppercased. Turkish has examples of the opposite, where the lowercasing can
  // increase length. If the string length changes when lowercased, term match
  // indexes will be off when being used to style the original-cased `text`.
  // This will cause either `DCHECK` crashes if the incorrect index is out of
  // bounds; or incorrect styling if the incorrect index remains in bounds or
  // `DCHECK`s are disabled. E.g. input 'ou' would bold 'Yo[uT]ube' if lower
  // case 'Y' is 2 characters.
  if (find_text_str.size() != find_text.size() ||
      text_str.size() != text.size()) {
    return {};
  }

  if (find_text_str.empty()) {
    return {};
  }

  if (allow_prefix_matching &&
      base::StartsWith(text_str, find_text_str, base::CompareCase::SENSITIVE)) {
    return {{0, 0, find_text_str.length()}};
  }

  String16Vector find_terms =
      String16VectorFromString16(find_text_str, nullptr);
  WordStarts word_starts;
  // `word_starts` is unused if `allow_mid_word_matching` is true.
  if (!allow_mid_word_matching) {
    String16VectorFromString16(text_str, &word_starts);
  }
  return FindTermMatchesForTerms(find_terms, WordStarts(find_terms.size(), 0),
                                 text_str, word_starts,
                                 allow_mid_word_matching);
}

TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                    const WordStarts& find_terms_word_starts,
                                    const std::u16string& cleaned_text,
                                    const WordStarts& text_word_starts,
                                    bool allow_mid_word_matching) {
  TermMatches matches = MatchTermsInString(find_terms, cleaned_text);
  matches = SortMatches(matches);
  matches = DeoverlapMatches(matches);

  if (allow_mid_word_matching)
    return matches;

  return ScoredHistoryMatch::FilterTermMatchesByWordStarts(
      matches, find_terms_word_starts, text_word_starts, 0, std::string::npos);
}

ACMatchClassifications ClassifyTermMatches(const TermMatches& matches,
                                           size_t text_length,
                                           int match_style,
                                           int non_match_style) {
  ACMatchClassifications classes;
  if (matches.empty()) {
    if (text_length)
      classes.push_back(ACMatchClassification(0, non_match_style));
    return classes;
  }
  if (matches[0].offset)
    classes.push_back(ACMatchClassification(0, non_match_style));
  size_t match_count = matches.size();
  for (size_t i = 0; i < match_count;) {
    size_t offset = matches[i].offset;
    classes.push_back(ACMatchClassification(offset, match_style));
    // Skip all adjacent matches.
    do {
      offset += matches[i].length;
      ++i;
    } while ((i < match_count) && (offset == matches[i].offset));
    if (offset < text_length)
      classes.push_back(ACMatchClassification(offset, non_match_style));
  }
  return classes;
}