File: shared_structs.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (171 lines) | stat: -rw-r--r-- 5,895 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_
#define CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_

#include <stdint.h>

#include <string>
#include <vector>

namespace ash::local_search_service {

// This should be kept in sync with
// //tools/metrics/histograms/metadata/local/histograms.xml.
enum class IndexId {
  kCrosSettings = 0,
  kHelpApp = 1,
  kHelpAppLauncher = 2,
  kPersonalization = 3,
  kShortcutsApp = 4,
  kMaxValue = kShortcutsApp,
};

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class Backend {
  kLinearMap = 0,
  kInvertedIndex = 1,
  kMaxValue = kInvertedIndex
};

struct Content {
  // An identifier for the content in Data.
  std::string id;
  std::u16string content;
  // |weight| represents how important this Content is and is used in
  // calculating overall matching score of its enclosing Data item. When a query
  // matches a Data item it is matching some Content of the Data. If the
  // matching Content has a larger weight, the overall matching score will be
  // higher. The range is in [0,1].
  // TODO(jiameng): it will be used by kInvertedIndex only. We may consider
  // extending to kLinearMap.
  double weight = 1.0;
  Content(const std::string& id,
          const std::u16string& content,
          double weight = 1.0);
  Content();
  Content(const Content& content);
  ~Content();
};

struct Data {
  // Identifier of the data item, should be unique across the registry. Clients
  // will decide what ids to use, they could be paths, urls or any opaque string
  // identifiers.
  // Ideally IDs should persist across sessions, but this is not strictly
  // required now because data is not persisted across sessions.
  std::string id;

  // Data item will be matched between its search tags and query term.
  std::vector<Content> contents;

  // Locale of the data. This is currently used by inverted index only.
  // If unset, we will use system configured locale.
  // TODO(jiameng): apply locale-dependent tokenization to linear map.
  std::string locale;
  Data(const std::string& id,
       const std::vector<Content>& contents,
       const std::string& locale = "");
  Data();
  Data(const Data& data);
  ~Data();
};

struct SearchParams {
  // |relevance_threshold| will be applicable if the backend is kLinearMap.
  // Relevance score will be calculated as a combination of prefix and fuzzy
  // matching. A Data item is relevant if the overall relevance score is above
  // this threshold. The threshold should be in [0,1].
  double relevance_threshold = 0.64;
  // |prefix_threshold| and |fuzzy_threshold| will be applicable if the backend
  // is kInvertedIndex. When a query term is matched against a Data item, it
  // will be considered relevant if either its prefix score is above
  // |prefix_threshold| or fuzzy score is above |fuzzy_threshold|. Both of these
  // thresholds should be in [0,1].
  double prefix_threshold = 0.6;
  double fuzzy_threshold = 0.7;
};

struct Position {
  Position();
  Position(const Position& position);
  Position(const std::string& content_id, uint32_t start, uint32_t length);
  ~Position();
  std::string content_id;
  // TODO(jiameng): |start| and |end| will be implemented for inverted index
  // later.
  uint32_t start;
  uint32_t length;
};

// Result is one item that matches a given query. It contains the id of the item
// and its matching score.
struct Result {
  // Id of the data.
  std::string id;
  // Relevance score.
  // Currently only linear map is implemented with fuzzy matching and score will
  // always be in [0,1]. In the future, when an inverted index is implemented,
  // the score will not be in this range any more. Client will be able to select
  // a search backend to use (linear map vs inverted index) and hence client
  // will be able to expect the range of the scores.
  double score;
  // Position of the matching text.
  // We currently use linear map, which will return one matching content, hence
  // the vector has only one element. When we have inverted index, we will have
  // multiple matching contents.
  std::vector<Position> positions;
  Result();
  Result(const Result& result);
  Result(const std::string& id,
         double score,
         const std::vector<Position>& positions);
  ~Result();
};

// Status of the search attempt.
// These numbers are used for logging and should not be changed or reused. More
// will be added later.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class ResponseStatus {
  kUnknownError = 0,
  // Search operation is successful. But there could be no matching item and
  // result list is empty.
  kSuccess = 1,
  // Query is empty.
  kEmptyQuery = 2,
  // Index is empty (i.e. no data).
  kEmptyIndex = 3,
  kMaxValue = kEmptyIndex
};

// Similar to Position but also contains weight from Content.
// This is used in ranking and is not meant to be returned as part of the search
// results.
struct WeightedPosition {
  double weight;
  Position position;
  WeightedPosition();
  WeightedPosition(const WeightedPosition& weighted_position);
  WeightedPosition(double weight, const Position& position);
  ~WeightedPosition();
};

// Stores the token (after processed). |positions| represents the token's
// positions in one document.
struct Token {
  Token();
  Token(const Token& token);
  Token(const std::u16string& text, const std::vector<WeightedPosition>& pos);
  ~Token();
  std::u16string content;
  std::vector<WeightedPosition> positions;
};

}  // namespace ash::local_search_service

#endif  // CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_