1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
|
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/bookmark_provider.h"
#include <stddef.h>
#include <algorithm>
#include <string>
#include <vector>
#include "base/feature_list.h"
#include "base/memory/raw_ptr.h"
#include "base/trace_event/trace_event.h"
#include "components/bookmarks/browser/bookmark_model.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/autocomplete_provider_client.h"
#include "components/omnibox/browser/autocomplete_result.h"
#include "components/omnibox/browser/keyword_provider.h"
#include "components/omnibox/browser/match_compare.h"
#include "components/omnibox/browser/omnibox_field_trial.h"
#include "components/omnibox/browser/omnibox_triggered_feature_service.h"
#include "components/omnibox/browser/scoring_functor.h"
#include "components/omnibox/browser/titled_url_match_utils.h"
#include "components/prefs/pref_service.h"
#include "components/query_parser/query_parser.h"
#include "third_party/metrics_proto/omnibox_event.pb.h"
#include "third_party/metrics_proto/omnibox_focus_type.pb.h"
#include "third_party/metrics_proto/omnibox_input_type.pb.h"
#include "third_party/omnibox_proto/groups.pb.h"
#include "ui/base/page_transition_types.h"
#include "url/url_constants.h"
using bookmarks::BookmarkNode;
using bookmarks::TitledUrlMatch;
BookmarkProvider::BookmarkProvider(AutocompleteProviderClient* client)
: AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK),
client_(client),
bookmark_model_(client ? client_->GetBookmarkModel() : nullptr) {}
void BookmarkProvider::Start(const AutocompleteInput& input,
bool minimal_changes) {
TRACE_EVENT0("omnibox", "BookmarkProvider::Start");
matches_.clear();
if (input.IsZeroSuggest() || input.text().empty()) {
return;
}
DoAutocomplete(input);
}
BookmarkProvider::~BookmarkProvider() = default;
void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
// We may not have a bookmark model for some unit tests.
if (!bookmark_model_) {
return;
}
// Retrieve enough bookmarks so that we have a reasonable probability of
// suggesting the one that the user desires.
const size_t kMaxBookmarkMatches = 50;
// Remove the keyword from input if we're in keyword mode for a starter pack
// engine.
const auto [adjusted_input, starter_pack_engine] =
AdjustInputForStarterPackKeyword(input, client_->GetTemplateURLService());
const query_parser::MatchingAlgorithm matching_algorithm =
GetMatchingAlgorithm(adjusted_input);
// GetBookmarksMatching returns bookmarks matching the user's
// search terms using the following rules:
// - The search text is broken up into search terms. Each term is searched
// for separately.
// - Term matches are always performed against the start of a word. 'def'
// will match against 'define' but not against 'indefinite'.
// - Terms perform partial word matches only if the the total search text
// length is at least 3 characters.
// - A search containing multiple terms will return results with those words
// occurring in any order.
// - Terms enclosed in quotes comprises a phrase that must match exactly.
// - Multiple terms enclosed in quotes will require those exact words in that
// exact order to match.
//
// Please refer to the code for TitledUrlIndex::GetResultsMatching for
// complete details of how searches are performed against the user's
// bookmarks.
std::vector<TitledUrlMatch> matches = bookmark_model_->GetBookmarksMatching(
adjusted_input.text(), kMaxBookmarkMatches, matching_algorithm);
if (matches.empty())
return; // There were no matches.
const std::u16string fixed_up_input(FixupUserInput(adjusted_input).second);
for (auto& bookmark_match : matches) {
// Score the TitledUrlMatch. If its score is greater than 0 then the
// AutocompleteMatch is created and added to matches_.
auto [relevance, bookmark_count] =
CalculateBookmarkMatchRelevance(bookmark_match);
if (relevance > 0) {
AutocompleteMatch match = TitledUrlMatchToAutocompleteMatch(
bookmark_match, AutocompleteMatchType::BOOKMARK_TITLE, relevance,
bookmark_count, this, client_->GetSchemeClassifier(), adjusted_input,
fixed_up_input);
// If the input was in a starter pack keyword scope, set the `keyword` and
// `transition` appropriately to avoid popping the user out of keyword
// mode.
if (starter_pack_engine) {
match.keyword = starter_pack_engine->keyword();
match.transition = ui::PAGE_TRANSITION_KEYWORD;
}
if (input.current_page_classification() ==
metrics::OmniboxEventProto_PageClassification_ANDROID_HUB) {
match.suggestion_group_id = omnibox::GROUP_MOBILE_BOOKMARKS;
}
matches_.push_back(match);
}
}
// In keyword mode, it's possible we only provide results from one or two
// autocomplete provider(s), so it's sometimes necessary to show more results
// than provider_max_matches_.
size_t max_matches = adjusted_input.InKeywordMode()
? provider_max_matches_in_keyword_mode_
: provider_max_matches_;
// Sort and clip the resulting matches.
size_t num_matches = std::min(matches_.size(), max_matches);
std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
matches_.end(), AutocompleteMatch::MoreRelevant);
if (input.current_page_classification() !=
PageClassification::OmniboxEventProto_PageClassification_ANDROID_HUB) {
ResizeMatches(
num_matches,
OmniboxFieldTrial::IsMlUrlScoringUnlimitedNumCandidatesEnabled());
}
}
query_parser::MatchingAlgorithm BookmarkProvider::GetMatchingAlgorithm(
AutocompleteInput input) {
if (input.current_page_classification() ==
PageClassification::OmniboxEventProto_PageClassification_ANDROID_HUB) {
return query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH;
}
// TODO(yoangela): This might have to check whether we're in @bookmarks mode
// specifically, since we might still get bookmarks suggestions in
// non-bookmarks keyword mode. This is enough of an edge case it makes sense
// to just stick with simplicity for now.
if (input.InKeywordMode()) {
return query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH;
}
if (input.text().length() >= 3)
return query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH;
return query_parser::MatchingAlgorithm::DEFAULT;
}
std::pair<int, int> BookmarkProvider::CalculateBookmarkMatchRelevance(
const TitledUrlMatch& bookmark_match) const {
// Summary on how a relevance score is determined for the match:
//
// For each match within the bookmark's title or URL (or both), calculate a
// 'factor', sum up those factors, then use the sum to figure out a value
// between the base score and the maximum score.
//
// The factor for each match is the product of:
//
// 1) how many characters in the bookmark's title/URL are part of this match.
// This is capped at the length of the bookmark's title to prevent terms
// that match in both the title and the URL from scoring too strongly.
//
// 2) where the match occurs within the bookmark's title or URL, giving more
// points for matches that appear earlier in the string:
// ((string_length - position of match start) / string_length).
//
// Example: Given a bookmark title of 'abcde fghijklm', with a title length
// of 14, and two different search terms, 'abcde' and 'fghij', with start
// positions of 0 and 6, respectively, 'abcde' will score higher (with a
// partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with a partial factor
// of (14-6)/14 = 0.571 ). (In this example neither term matches in the URL.)
//
// Once all match factors have been calculated they are summed. If there are
// no URL matches, the resulting sum will never be greater than the length of
// the bookmark title because of the way the bookmark model matches and
// removes overlaps. (In particular, the bookmark model only matches terms to
// the beginning of words, and it removes all overlapping matches, keeping
// only the longest. Together these mean that each character is included in at
// most one match.) If there are matches in the URL, the sum can be greater.
//
// This sum is then normalized by the length of the bookmark title + 10 and
// capped at 1.0. The +10 is to expand the scoring range so fewer bookmarks
// will hit the 1.0 cap and hence lose all ability to distinguish between
// these high-quality bookmarks.
//
// The normalized value is multiplied against the scoring range available,
// which is the difference between the minimum possible score and the maximum
// possible score. This product is added to the minimum possible score to give
// the preliminary score.
//
// If the preliminary score is less than the maximum possible score, 1199, it
// can be boosted up to that maximum possible score if the URL referenced by
// the bookmark is also referenced by any of the user's other bookmarks. A
// count of how many times the bookmark's URL is referenced is determined and,
// for each additional reference beyond the one for the bookmark being scored
// up to a maximum of three, the score is boosted by a fixed amount given by
// `kURLCountBoost`, below.
size_t title_length = bookmark_match.node->GetTitledUrlNodeTitle().length();
size_t url_length =
bookmark_match.node->GetTitledUrlNodeUrl().spec().length();
size_t length = title_length > 0 ? title_length : url_length;
ScoringFunctor title_position_functor = for_each(
bookmark_match.title_match_positions.begin(),
bookmark_match.title_match_positions.end(), ScoringFunctor(title_length));
ScoringFunctor url_position_functor = for_each(
bookmark_match.url_match_positions.begin(),
bookmark_match.url_match_positions.end(), ScoringFunctor(url_length));
const double title_match_strength = title_position_functor.ScoringFactor();
const double summed_factors =
title_match_strength + url_position_functor.ScoringFactor();
const double normalized_sum = std::min(summed_factors / (length + 10), 1.0);
// Bookmarks with javascript scheme ("bookmarklets") that do not have title
// matches get a lower base and lower maximum score because returning them
// for matches in their (often very long) URL looks bad and is often not
// intended by the user.
const GURL& url(bookmark_match.node->GetTitledUrlNodeUrl());
const bool bookmarklet_without_title_match =
url.SchemeIs(url::kJavaScriptScheme) && title_match_strength == 0.0;
const int kBaseBookmarkScore = bookmarklet_without_title_match ? 400 : 900;
const int kMaxBookmarkScore = bookmarklet_without_title_match ? 799 : 1199;
const double kBookmarkScoreRange =
static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
int relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
kBaseBookmarkScore;
// If scoring signal logging and ML scoring is disabled, skip counting
// bookmarks if relevance is above max score. Don't waste any time searching
// for additional referenced URLs if we already have a perfect title match.
// Returns a pair of the relevance score and -1 as a dummy bookmark count.
if (!OmniboxFieldTrial::IsPopulatingUrlScoringSignalsEnabled() &&
relevance >= kMaxBookmarkScore) {
return {relevance, /*bookmark_count=*/-1};
}
// Boost the score if the bookmark's URL is referenced by other bookmarks.
constexpr std::array<int, 4> kURLCountBoost = {0, 75, 125, 150};
const size_t url_node_count = bookmark_model_->GetNodesByURL(url).size();
DCHECK_GE(std::min(std::size(kURLCountBoost), url_node_count), 1U);
relevance +=
kURLCountBoost[std::min(kURLCountBoost.size(), url_node_count) - 1];
relevance = std::min(kMaxBookmarkScore, relevance);
return {relevance, url_node_count};
}
|