File: history_fuzzy_provider.cc

package info (click to toggle)
chromium 139.0.7258.127-1
links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (711 lines) | stat: -rw-r--r-- 27,182 bytes
parent folder | download | duplicates (5)
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/omnibox/browser/history_fuzzy_provider.h"

#include <functional>
#include <memory>
#include <ostream>
#include <queue>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "base/check.h"
#include "base/containers/contains.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/scoped_refptr.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/utf_string_conversions.h"
#include "base/system/sys_info.h"
#include "base/time/time.h"
#include "base/trace_event/memory_usage_estimator.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "components/history/core/browser/history_database.h"
#include "components/history/core/browser/history_db_task.h"
#include "components/history/core/browser/history_service.h"
#include "components/history/core/browser/url_database.h"
#include "components/omnibox/browser/autocomplete_match_classification.h"
#include "components/omnibox/browser/autocomplete_match_type.h"
#include "components/omnibox/browser/autocomplete_provider_client.h"
#include "components/omnibox/browser/bookmark_provider.h"
#include "components/omnibox/browser/history_quick_provider.h"
#include "components/omnibox/browser/omnibox_field_trial.h"
#include "components/omnibox/browser/omnibox_triggered_feature_service.h"
#include "components/url_formatter/elide_url.h"
#include "third_party/metrics_proto/omnibox_event.pb.h"
#include "third_party/metrics_proto/omnibox_focus_type.pb.h"
#include "url/gurl.h"

namespace {

// Histogram names for measuring sub-provider match conversion efficacy.
// Reminder in case other sub-providers or metrics are added: update
// the `Omnibox.HistoryFuzzy.MatchConversion` entry in histograms.xml.
const char kMetricMatchConversionHistoryQuick[] =
    "Omnibox.HistoryFuzzy.MatchConversion.HistoryQuick";
const char kMetricMatchConversionBookmark[] =
    "Omnibox.HistoryFuzzy.MatchConversion.Bookmark";

// Histogram name for time spent on the fuzzy search portion of provider time.
const char kMetricSearchDuration[] = "Omnibox.HistoryFuzzy.SearchDuration";

// Histogram name for whether a presented fuzzy match was the one taken by the
// user at the moment a match was opened.
const char kMetricPrecision[] = "Omnibox.HistoryFuzzy.Precision";

// This cap ensures the search trie will not grow without bound. Up to half
// the total capacity may be filled at startup from loaded significant URLs.
// The enforced limit may be further constrained by
// `MaxNumHQPUrlsIndexedAtStartup`.
constexpr int kMaxTerminalCount = 256;

// This utility function reduces a URL to the most meaningful and likely part
// of the hostname to be matched against, i.e. the domain, the URL's TLD+1.
// May return an empty string if the given URL is not a good candidate for
// meaningful domain name matching.
std::u16string UrlDomainReduction(const GURL& url) {
  std::u16string url_host;
  std::u16string url_domain;
  url_formatter::SplitHost(url, &url_host, &url_domain, nullptr);
  return url_domain;
}

// This utility function prepares input text for fuzzy matching, or returns
// an empty string in cases unlikely to be worth a fuzzy matching search.
// Note, this is intended to be a fast way to improve matching and eliminate
// likely-unfruitful searches. It could make use of `SplitHost` as above, or
// `url_formatter::FormatUrlForDisplayOmitSchemePathAndTrivialSubdomains`,
// which uses `FormatUrlWithAdjustments` under the hood, but all that URL
// processing for input text that may not even be a URL seems like overkill,
// so this simple direct method is used instead.
std::u16string ReduceInputTextForMatching(const std::u16string& input) {
  constexpr size_t kMaximumFuzzyMatchInputLength = 24;
  constexpr size_t kPathCharacterCountToStopSearch = 6;
  constexpr size_t kPostDotCharacterCountHintingSubdomain = 4;

  // Long inputs are not fuzzy matched; doing so could be costly, and the
  // length of input itself is a signal that it may not have been typed but
  // simply pasted or edited in place.
  if (input.length() > kMaximumFuzzyMatchInputLength) {
    return std::u16string();
  }

  // Spaces hint that the input may be a search, not a URL.
  if (input.find(u' ') != std::u16string::npos) {
    return std::u16string();
  }

  // Inputs containing anything that looks like a scheme are a hint that this
  // is an existing URL or an edit that's likely to be handled deliberately,
  // not a messy human input that may need fuzzy matching.
  if (input.find(u"://") != std::u16string::npos) {
    return std::u16string();
  }

  std::u16string remaining;
  // While typing a URL, the user may typo the domain but then continue on to
  // the path; keeping input up to the path separator keeps the window open
  // for fuzzy matching the domain as they continue to type, but we don't want
  // to keep it open forever (doing so could result in potentially sticky false
  // positives).
  size_t index = input.find(u'/');
  if (index != std::u16string::npos) {
    if (index + kPathCharacterCountToStopSearch < input.length()) {
      // User has moved well beyond typing domain and hasn't taken any fuzzy
      // suggestions provided so far, and they won't get better, so we can
      // save compute and suggestion results space by stopping the search.
      return std::u16string();
    }
    remaining = input.substr(0, index);
  } else {
    remaining = input;
  }

  index = remaining.find(u'.');
  if (index != std::u16string::npos &&
      index + kPostDotCharacterCountHintingSubdomain < remaining.length()) {
    // Keep input with dot if near the end (within range of .com, .org, .edu).
    // With a dot earlier in the string, the user might be typing a subdomain
    // and we only have the TLD+1 stored in the trie, so skip the dot and match
    // against the remaining text. This may be helpful in common cases like
    // typing an unnecessary "www." before the domain name.
    remaining = remaining.substr(index + 1);
  }

  return remaining;
}

}  // namespace

namespace fuzzy {

Edit::Edit(Kind kind, size_t at, char16_t new_char)
    : kind(kind), new_char(new_char), at(at) {}

void Edit::ApplyTo(std::u16string& text) const {
  switch (kind) {
    case Kind::DELETE: {
      text.erase(at, 1);
      break;
    }
    case Kind::INSERT: {
      text.insert(at, 1, new_char);
      break;
    }
    case Kind::REPLACE: {
      text[at] = new_char;
      break;
    }
    case Kind::TRANSPOSE: {
      text[at] = text[at + 1];
      text[at + 1] = new_char;
      break;
    }
    case Kind::KEEP:
    default: {
      NOTREACHED();
    }
  }
}

Correction Correction::WithEdit(Edit edit) const {
  DCHECK(edit_count < Correction::kMaxEdits);
  Correction correction = *this;
  correction.edits[edit_count] = edit;
  correction.edit_count++;
  return correction;
}

void Correction::ApplyTo(std::u16string& text) const {
  size_t i = edit_count;
  while (i > 0) {
    i--;
    edits[i].ApplyTo(text);
  }
}

Node::Node() = default;

Node::Node(Node&&) = default;

Node::~Node() = default;

void Node::Insert(const std::u16string& text, size_t text_index) {
  if (text_index >= text.length()) {
    relevance_total += 1 - relevance;
    relevance = 1;
    return;
  }
  std::unique_ptr<Node>& node = next[text[text_index]];
  if (!node) {
    node = std::make_unique<Node>();
  }
  relevance_total -= node->relevance_total;
  node->Insert(text, text_index + 1);
  relevance_total += node->relevance_total;
}

void Node::Delete(const std::u16string& text, size_t text_index) {
  if (text_index < text.length()) {
    auto it = next.find(text[text_index]);
    if (it != next.end()) {
      Node* const node = it->second.get();
      relevance_total -= node->relevance_total;
      node->Delete(text, text_index + 1);
      if (node->relevance_total == 0) {
        next.erase(it);
      } else {
        relevance_total += node->relevance_total;
      }
    }
  } else {
    relevance_total -= relevance;
    relevance = 0;
  }
}

void Node::Clear() {
  next.clear();
}

bool Node::FindCorrections(const std::u16string& text,
                           ToleranceSchedule tolerance_schedule,
                           std::vector<Correction>& corrections) const {
  DCHECK(corrections.empty());
  DCHECK(tolerance_schedule.limit <= Correction::kMaxEdits);

  if (text.length() == 0) {
    return true;
  }

  // A utility class to track search progression.
  struct Step {
    // Walks through trie.
    raw_ptr<const Node> node;

    // Edit distance.
    int distance;

    // Advances through input text. This effectively tells how much of the
    // input has been consumed so far, regardless of output text length.
    size_t index;

    // Length of corrected text. This tells how long the output string will
    // be, regardless of input text length. It is independent of `index`
    // because corrections are not only 1:1 replacements but may involve
    // insertions or deletions as well.
    int length;

    // Backtracking data to enable text correction (from end of string back
    // to beginning, i.e. correction chains are applied in reverse).
    Correction correction;

    // std::priority_queue keeps the greatest element on top, so we want this
    // operator implementation to make bad steps less than good steps.
    // Prioritize minimum distance, with index and length to break ties.
    // The first found solutions are best, and fastest in common cases
    // near input on trie.
    bool operator<(const Step& rhs) const {
      if (distance > rhs.distance) {
        return true;
      } else if (distance == rhs.distance) {
        if (index < rhs.index) {
          return true;
        } else if (index == rhs.index) {
          return length < rhs.length;
        }
      }
      return false;
    }
  };

  std::priority_queue<Step> pq;
  pq.push({this, 0, 0, 0, Correction()});

  Step best{nullptr, INT_MAX, SIZE_MAX, INT_MAX, Correction()};

  // Find and return all equally-distant results as soon as distance increases
  // beyond that of first found results. Length is also considered to
  // avoid producing shorter substring texts.
  while (!pq.empty() && pq.top().distance <= best.distance) {
    Step step = pq.top();
    pq.pop();
    // Strictly greater should not be possible for this comparison.
    if (step.index >= text.length()) {
      if (step.distance == 0) {
        // Ideal common case, full input on trie with no correction required.
        // Because search is directed by priority_queue, we get here before
        // generating any corrections (straight line to goal is shortest path).
        DCHECK(corrections.empty());
        return true;
      }
      // Check `length` to keep longer results. Without this, we could end up
      // with shorter substring corrections (e.g. both "was" and "wash").
      // It may not be necessary to do this if priority_queue keeps results
      // optimal or returns a first best result immediately.
      DCHECK(best.distance == INT_MAX || step.distance == best.distance);
      if (step.distance < best.distance || step.length > best.length) {
        best = std::move(step);
        corrections.clear();
        // Dereference is safe because nonzero distance implies presence of
        // nontrivial correction.
        corrections.emplace_back(best.correction);
      } else {
        // Equal distance.
        // Strictly greater should not be possible for this comparison.
        if (step.length >= best.length) {
          // Dereference is safe because this is another equally
          // distant correction, necessarily discovered after the first.
          corrections.emplace_back(step.correction);
        }
#if DCHECK_ALWAYS_ON
        std::u16string corrected = text;
        step.correction.ApplyTo(corrected);
        DCHECK_EQ(corrected.length(), static_cast<size_t>(step.length))
            << corrected;
#endif
      }
      continue;
    }
    int tolerance = tolerance_schedule.ToleranceAt(step.index);
    if (step.distance < tolerance) {
      // Delete
      pq.push(
          {step.node, step.distance + 1, step.index + 1, step.length,
           step.correction.WithEdit({Edit::Kind::DELETE, step.index, '_'})});
    }
    for (const auto& entry : step.node->next) {
      const char16_t step_text_char = text[step.index];
      if (entry.first == step_text_char) {
        // Keep
        pq.push({entry.second.get(), step.distance, step.index + 1,
                 step.length + 1, step.correction});
      } else if (step.distance < tolerance) {
        // Insert
        pq.push({entry.second.get(), step.distance + 1, step.index,
                 step.length + 1,
                 step.correction.WithEdit(
                     {Edit::Kind::INSERT, step.index, entry.first})});

        // Replace. Note, we do not replace at the same position as a previous
        // insertion because doing so could produce unnecessary duplicates.
        const Edit& step_edit =
            step.correction.edit_count > 0
                ? step.correction.edits[step.correction.edit_count - 1]
                : Edit(Edit::Kind::KEEP, 0, '_');

        if (step_edit.kind != Edit::Kind::INSERT ||
            step_edit.at != step.index) {
          pq.push({entry.second.get(), step.distance + 1, step.index + 1,
                   step.length + 1,
                   step.correction.WithEdit(
                       {Edit::Kind::REPLACE, step.index, entry.first})});
        }

        // Transpose. Look ahead cost can be balanced by faster
        // advancement through input text resulting in shorter search.
        if (text.size() > step.index + 1 &&
            text[step.index + 1] == entry.first) {
          const auto it = entry.second->next.find(step_text_char);
          if (it != entry.second->next.end()) {
            pq.push({it->second.get(), step.distance + 1, step.index + 2,
                     step.length + 2,
                     step.correction.WithEdit(
                         {Edit::Kind::TRANSPOSE, step.index, step_text_char})});
          }
        }
      }
    }
  }

  return false;
}

size_t Node::EstimateMemoryUsage() const {
  size_t res = 0;
  res += base::trace_event::EstimateMemoryUsage(next);
  return res;
}

int Node::TerminalCount() const {
  // This works as long as `relevance` values mark terminals with 1 and
  // non-terminals with 0; see `Insert()`.
  return relevance_total;
}

// This task class loads URLs considered significant according to
// `HistoryDatabase::InitURLEnumeratorForSignificant` but there's nothing
// special about that implementation; we may do something different for
// fuzzy matching. The goal in general is to load and keep a reasonably sized
// set of likely relevant host names for fast fuzzy correction.
class LoadSignificantUrls : public history::HistoryDBTask {
 public:
  using Callback = base::OnceCallback<void(Node)>;

  explicit LoadSignificantUrls(Callback callback)
      : callback_(std::move(callback)) {}
  ~LoadSignificantUrls() override = default;

  bool RunOnDBThread(history::HistoryBackend* backend,
                     history::HistoryDatabase* db) override {
    history::URLDatabase::URLEnumerator enumerator;
    if (db && db->InitURLEnumeratorForSignificant(&enumerator)) {
      history::URLRow row;
      // The `MaxNumHQPUrlsIndexedAtStartup` dependency here is to ensure
      // that we keep a lower cap for mobile; it's much higher on desktop.
      // Note the divide, which ensures at least half the capacity will be kept
      // for later visited domains. `GetNextUrl` takes the most significant
      // URLs from the database (enumerator order) and duplicates won't count.
      const int max_terminal_count =
          std::min(OmniboxFieldTrial::MaxNumHQPUrlsIndexedAtStartup(),
                   kMaxTerminalCount) /
          2;
      while (enumerator.GetNextURL(&row) &&
             node_.TerminalCount() < max_terminal_count) {
        node_.Insert(UrlDomainReduction(row.url()), 0);
      }
    }
    return true;
  }

  void DoneRunOnMainThread() override {
    std::move(callback_).Run(std::move(node_));
  }

 private:
  Node node_;
  Callback callback_;
};

}  // namespace fuzzy

// static
void HistoryFuzzyProvider::RecordOpenMatchMetrics(
    const AutocompleteResult& result,
    const AutocompleteMatch& match_opened) {
  if (std::ranges::any_of(result, [](const AutocompleteMatch& match) {
        return match.provider && match.provider->type() ==
                                     AutocompleteProvider::TYPE_HISTORY_FUZZY;
      })) {
    const bool opened_fuzzy_match = match_opened.provider->type() ==
                                    AutocompleteProvider::TYPE_HISTORY_FUZZY;
    UMA_HISTOGRAM_BOOLEAN(kMetricPrecision, opened_fuzzy_match);
  }
}

HistoryFuzzyProvider::HistoryFuzzyProvider(AutocompleteProviderClient* client)
    : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_FUZZY, client) {
  // Set up tunable parameters. These can be used to affect fuzzy matching
  // behavior and performance. Note, we use different `min_input_length_` values
  // depending on desktop versus mobile platforms, determined by experiment.
#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
  min_input_length_ = 5;
#else
  min_input_length_ = 3;
#endif
  // These initial penalty values produce good results for most inputs:
  // Using 10% reasonably took a 1334 relevance match down to 1200,
  // but was harmful to HQP suggestions: as soon as a '.' was
  // appended, a bunch of ~800 navsuggest results overtook a better
  // HQP result that was bumped down to ~770. Using 5% lets this
  // result compete in the navsuggest range.
  penalty_low_ = 5;
  penalty_high_ = 5;
  // The default value of zero means "no taper", and only the lowest penalty
  // will be applied.
  penalty_taper_length_ = 0;

  // In tests, history service is null and doesn't need to be observed.
  if (client->GetHistoryService()) {
    history_service_observation_.Observe(client->GetHistoryService());
    client->GetHistoryService()->ScheduleDBTask(
        FROM_HERE,
        std::make_unique<fuzzy::LoadSignificantUrls>(
            base::BindOnce(&HistoryFuzzyProvider::OnUrlsLoaded,
                           weak_ptr_factory_.GetWeakPtr())),
        &task_tracker_);
  }
}

void HistoryFuzzyProvider::Start(const AutocompleteInput& input,
                                 bool minimal_changes) {
  TRACE_EVENT0("omnibox", "HistoryFuzzyProvider::Start");
  matches_.clear();
  if (input.IsZeroSuggest() ||
      input.type() == metrics::OmniboxInputType::EMPTY) {
    return;
  }

  // Note this will always return early when bypassing for low-end devices;
  // see comment in constructor.
  if (!urls_loaded_event_.IsSignaled()) {
    return;
  }

  autocomplete_input_ = input;

  // Fuzzy matching intends to correct quick typos, and because it may involve
  // a compute intensive search, some conditions are checked to bypass this
  // provider early. When the cursor is moved from the end of input string,
  // user may have slowed down to edit manually.
  if (autocomplete_input_.cursor_position() ==
      autocomplete_input_.text().length()) {
    DoAutocomplete();
  }
}

size_t HistoryFuzzyProvider::EstimateMemoryUsage() const {
  size_t res = HistoryProvider::EstimateMemoryUsage();
  res += base::trace_event::EstimateMemoryUsage(autocomplete_input_);
  res += base::trace_event::EstimateMemoryUsage(root_);
  return res;
}

HistoryFuzzyProvider::~HistoryFuzzyProvider() = default;

void HistoryFuzzyProvider::DoAutocomplete() {
  constexpr fuzzy::ToleranceSchedule kToleranceSchedule = {
      .start_index = 2,
      .step_length = 4,
      .limit = 3,
  };

  const std::u16string& text =
      ReduceInputTextForMatching(autocomplete_input_.text());
  const size_t input_length = text.length();
  // Note: We can always return if `input_length` is zero, but
  // `min_input_length_` is an experimental parameter for more control.
  // So the second condition can be cleaned up if not needed, but
  // the first condition should be kept regardless.
  if (input_length == 0) {
    return;
  }
  if (input_length < min_input_length_) {
    return;
  }
  std::vector<fuzzy::Correction> corrections;
  const base::TimeTicks time_start = base::TimeTicks::Now();
  root_.FindCorrections(text, kToleranceSchedule, corrections);
  const base::TimeTicks time_end = base::TimeTicks::Now();
  UMA_HISTOGRAM_TIMES(kMetricSearchDuration, time_end - time_start);
  if (!corrections.empty()) {
    // Relevance ranges are nuanced enough that this should be kept reasonably
    // simple, but the experience of the feature is sensitive to the penalty so
    // we support a range from highest penalty on short inputs to lowest penalty
    // on longer inputs, with a linear taper in between.
    int penalty = penalty_low_;

    // Compute additional penalty for very short inputs.
    if (penalty_taper_length_ > 0) {
      DCHECK_GE(input_length, min_input_length_);
      const size_t extra_length = input_length - min_input_length_;
      if (extra_length <= penalty_taper_length_) {
        DCHECK_GE(penalty_high_, penalty_low_);
        penalty += ((penalty_taper_length_ - extra_length) *
                    (penalty_high_ - penalty_low_)) /
                   penalty_taper_length_;
      }
    }

    // Use of `scoped_refptr` is required here because destructor is private.
    scoped_refptr<HistoryQuickProvider> history_quick_provider =
        new HistoryQuickProvider(client());
    scoped_refptr<BookmarkProvider> bookmark_provider =
        new BookmarkProvider(client());
    int count_history_quick = 0;
    int count_bookmark = 0;
    for (const auto& correction : corrections) {
      std::u16string fixed = text;
      correction.ApplyTo(fixed);

      // Note the `cursor_position` could be changed by insert or delete
      // corrections, but this is easy to adapt since we only fuzzy
      // match when cursor is at end of input; just move to new end.
      DCHECK_EQ(autocomplete_input_.cursor_position(),
                autocomplete_input_.text().length());
      AutocompleteInput corrected_input(
          fixed, fixed.length(),
          autocomplete_input_.current_page_classification(),
          client()->GetSchemeClassifier());

      history_quick_provider->Start(corrected_input, false);
      DCHECK(history_quick_provider->done());
      bookmark_provider->Start(corrected_input, false);
      DCHECK(bookmark_provider->done());

      count_history_quick +=
          AddConvertedMatches(history_quick_provider->matches(), penalty);
      count_bookmark +=
          AddConvertedMatches(bookmark_provider->matches(), penalty);
    }
    if (matches_.size() > provider_max_matches_) {
      // When too many matches are generated, take only the most relevant
      // matches and correct the counts for accurate metrics.
      std::partial_sort(matches_.begin(),
                        matches_.begin() + provider_max_matches_,
                        matches_.end(), AutocompleteMatch::MoreRelevant);
      for (size_t i = provider_max_matches_; i < matches_.size(); i++) {
        DCHECK(matches_[i].provider.get() == history_quick_provider ||
               matches_[i].provider.get() == bookmark_provider)
            << matches_[i].provider->GetName();
        if (matches_[i].provider.get() == history_quick_provider) {
          count_history_quick--;
        } else {
          count_bookmark--;
        }
      }
      matches_.resize(provider_max_matches_);
    }

    for (AutocompleteMatch& match : matches_) {
      match.provider = this;
    }

    RecordMatchConversion(kMetricMatchConversionHistoryQuick,
                          count_history_quick);
    RecordMatchConversion(kMetricMatchConversionBookmark, count_bookmark);
  }
}

int HistoryFuzzyProvider::AddConvertedMatches(const ACMatches& matches,
                                              int penalty) {
  if (matches.empty()) {
    return 0;
  }

  // Take only the most relevant match, to give the best chance of keeping
  // the penalized fuzzy match while reducing risk of possible noise.
  // Note that min_element is used instead of max_element because
  // `AutocompleteMatch::MoreRelevant` reverses standard sort order such that
  // matches with greater relevance are considered less than matches with lesser
  // relevance. For performance reasons, `CompareWithDemoteByType` is not used,
  // so ranking of the final result set will be more nuanced than ranking here.
  ACMatches::const_iterator it = std::min_element(
      matches.begin(), matches.end(), AutocompleteMatch::MoreRelevant);
  CHECK(it != matches.end());
  matches_.push_back(*it);

  // Update match in place. Note, `match.provider` will be reassigned after
  // `DoAutocomplete` because source sub-provider must be kept for metrics.
  AutocompleteMatch& match = matches_.back();

  // It's important that fuzzy matches do not try to become default and inline
  // autocomplete because the input/match-data mismatch can cause problems
  // with user interaction and omnibox text editing; see crbug/1347440.
  match.allowed_to_be_default_match = false;
  match.inline_autocompletion.clear();

  // Apply relevance penalty; all corrections are equal and we only apply this
  // to the most relevant result, so edit distance isn't needed.
  DCHECK_GE(penalty, 0);
  DCHECK_LE(penalty, 100);
  match.relevance -= match.relevance * penalty / 100;

  // Scoring signals are calculated in the history and bookmark providers using
  // the corrected input. These scoring signals are inaccurate for the true
  // input, so clear them to prevent the ml model assigning an
  // artificially high confidence to this suggestion.
  match.scoring_signals.reset();

  return 1;
}

void HistoryFuzzyProvider::OnUrlsLoaded(fuzzy::Node node) {
  root_ = std::move(node);
  urls_loaded_event_.Signal();
}

void HistoryFuzzyProvider::OnURLVisited(
    history::HistoryService* history_service,
    const history::URLRow& url_row,
    const history::VisitRow& new_visit) {
  if (root_.TerminalCount() <
      std::min(OmniboxFieldTrial::MaxNumHQPUrlsIndexedAtStartup(),
               kMaxTerminalCount)) {
    root_.Insert(UrlDomainReduction(url_row.url()), 0);
  }
}

void HistoryFuzzyProvider::OnHistoryDeletions(
    history::HistoryService* history_service,
    const history::DeletionInfo& deletion_info) {
  // Note, this implementation is conservative in terms of user privacy; it
  // deletes hosts from the trie if any URL with the given host is deleted.
  if (deletion_info.IsAllHistory()) {
    root_.Clear();
  } else {
    for (const history::URLRow& row : deletion_info.deleted_rows()) {
      root_.Delete(UrlDomainReduction(row.url()), 0);
    }
  }
}

void HistoryFuzzyProvider::RecordMatchConversion(const char* name, int count) {
  base::UmaHistogramExactLinear(
      name, count, AutocompleteResult::kMaxAutocompletePositionValue);
}