File: titled_url_index.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (167 lines) | stat: -rw-r--r-- 6,634 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_
#define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_

#include <stddef.h>

#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <vector>

#include "base/containers/flat_set.h"
#include "base/feature_list.h"
#include "base/memory/raw_ptr.h"
#include "components/bookmarks/browser/titled_url_node_sorter.h"
#include "components/query_parser/query_parser.h"

namespace bookmarks {

class TitledUrlNode;

struct TitledUrlMatch;

// TitledUrlIndex maintains an index of paired titles and URLs for quick lookup.
//
// TitledUrlIndex maintains the index (index_) as a map of sets. The map (type
// Index) maps from a lower case string to the set (type TitledUrlNodeSet) of
// TitledUrlNodes that contain that string in their title or URL.
class TitledUrlIndex {
 public:
  using TitledUrlNodeSet =
      base::flat_set<raw_ptr<const TitledUrlNode, CtnExperimental>>;

  // Constructs a TitledUrlIndex. `sorter` is used to construct a sorted list
  // of matches when matches are returned from the index. If null, matches are
  // returned unsorted.
  explicit TitledUrlIndex(
      std::unique_ptr<TitledUrlNodeSorter> sorter = nullptr);

  TitledUrlIndex(const TitledUrlIndex&) = delete;
  TitledUrlIndex& operator=(const TitledUrlIndex&) = delete;

  ~TitledUrlIndex();

  void SetNodeSorter(std::unique_ptr<TitledUrlNodeSorter> sorter);

  // Invoked when a title/URL pair has been added to the model.
  void Add(const TitledUrlNode* node);

  // Invoked when a title/URL pair has been removed from the model.
  void Remove(const TitledUrlNode* node);

  // Invoked when a folder has been added to the model.
  void AddPath(const TitledUrlNode* node);

  // Invoked when a folder has been removed from the model.
  void RemovePath(const TitledUrlNode* node);

  // Returns up to `max_count` of matches containing each term from the text
  // `query` in either the title, URL, or the titles of ancestor nodes.
  // `matching_algorithm` determines the algorithm used by QueryParser
  // internally to parse `query`.
  std::vector<TitledUrlMatch> GetResultsMatching(
      const std::u16string& query,
      size_t max_count,
      query_parser::MatchingAlgorithm matching_algorithm);

  // Returns a normalized version of the UTF16 string `text`.  If it fails to
  // normalize the string, returns `text` itself as a best-effort.
  static std::u16string Normalize(std::u16string_view text);

 private:
  friend class TitledUrlIndexFake;

  using TitledUrlNodes =
      std::vector<raw_ptr<const TitledUrlNode, CtnExperimental>>;
  using Index = std::map<std::u16string, TitledUrlNodeSet>;

  // Constructs `sorted_nodes` by copying the matches in `matches` and sorting
  // them.
  void SortMatches(const TitledUrlNodeSet& matches,
                   TitledUrlNodes* sorted_nodes) const;

  // For each node, calls `MatchTitledUrlNodeWithQuery()` and returns the
  // aggregated `TitledUrlMatch`s.
  std::vector<TitledUrlMatch> MatchTitledUrlNodesWithQuery(
      const TitledUrlNodes& nodes,
      const query_parser::QueryNodeVector& query_nodes,
      const std::vector<std::u16string>& query_terms,
      size_t max_count);

  // Finds `query_nodes` matches in `node` and returns a TitledUrlMatch
  // containing `node` and the matches.
  std::optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery(
      const TitledUrlNode* node,
      const query_parser::QueryNodeVector& query_nodes,
      const std::vector<std::u16string>& query_terms);

  // Return matches for the specified `terms`. This is an intersection of each
  // term's matches.
  TitledUrlNodeSet RetrieveNodesMatchingAllTerms(
      const std::vector<std::u16string>& terms,
      query_parser::MatchingAlgorithm matching_algorithm) const;

  // Return matches for the specified `terms`. This is approximately a union of
  // each term's match, with some limitations to avoid too many nodes being
  // returned: terms shorter than `term_min_length` or matching more than
  // `max_nodes_per_term` nodes won't have their nodes accumulated by union; and
  // accumulation is capped to `max_nodes`. Guaranteed to include any node
  // `RetrieveNodesMatchingAllTerms()` includes.
  TitledUrlNodeSet RetrieveNodesMatchingAnyTerms(
      const std::vector<std::u16string>& terms,
      query_parser::MatchingAlgorithm matching_algorithm,
      size_t max_nodes) const;

  // Return matches for the specified `term`. May return duplicates.
  TitledUrlNodes RetrieveNodesMatchingTerm(
      const std::u16string& term,
      query_parser::MatchingAlgorithm matching_algorithm) const;

  // Return true if `term` matches any path. in `path_index_`.
  bool DoesTermMatchPath(
      const std::u16string& term,
      query_parser::MatchingAlgorithm matching_algorithm) const;

  // Returns the set of query words from `query`.
  static std::vector<std::u16string> ExtractQueryWords(
      const std::u16string& query);

  // Return the index terms for `node`.
  static std::vector<std::u16string> ExtractIndexTerms(
      const TitledUrlNode* node);

  // Adds `node` to `index_`.
  void RegisterNode(const std::u16string& term, const TitledUrlNode* node);

  // Removes `node` from `index_`.
  void UnregisterNode(const std::u16string& term, const TitledUrlNode* node);

  // A map of terms and the nodes containing those terms in their titles or
  // URLs. E.g., given 2 bookmarks titled 'x y x' and 'x z', `index` would
  // contain: `{ x: set[node1, node2], y: set[node1], z: set[node2] }`.
  Index index_;
  // A map of terms and the number of times it occurs in paths. E.g., given
  // 2 paths 'bookmarks bar/x y x/x' and 'bookmarks bar/x z/x', `path_index_`
  // would contain `{ bookmarks: 2, bar: 2, x: 4, y: 1, z: 1 }`. Note, 'x' has
  // count 4, since it occurred twice in each path. Doesn't track actual
  // bookmark nodes, as the latter would need large updates when moving,
  // folders. Tracks counts so terms can be unindexed when the last containing
  // folder is renamed or deleted. Updated on folder rename, creation, and
  // deletion; not updated on bookmark or folder move. Used to short circuit
  // unioning per-term matches when matching paths, as intersecting results in
  // much fewer nodes.
  std::map<std::u16string, size_t> path_index_;

  std::unique_ptr<TitledUrlNodeSorter> sorter_;
};

}  // namespace bookmarks

#endif  // COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_