1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_
#define CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_
#include <stdint.h>
#include <string>
#include <vector>
namespace ash::local_search_service {
// This should be kept in sync with
// //tools/metrics/histograms/metadata/local/histograms.xml.
enum class IndexId {
kCrosSettings = 0,
kHelpApp = 1,
kHelpAppLauncher = 2,
kPersonalization = 3,
kShortcutsApp = 4,
kMaxValue = kShortcutsApp,
};
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class Backend {
kLinearMap = 0,
kInvertedIndex = 1,
kMaxValue = kInvertedIndex
};
struct Content {
// An identifier for the content in Data.
std::string id;
std::u16string content;
// |weight| represents how important this Content is and is used in
// calculating overall matching score of its enclosing Data item. When a query
// matches a Data item it is matching some Content of the Data. If the
// matching Content has a larger weight, the overall matching score will be
// higher. The range is in [0,1].
// TODO(jiameng): it will be used by kInvertedIndex only. We may consider
// extending to kLinearMap.
double weight = 1.0;
Content(const std::string& id,
const std::u16string& content,
double weight = 1.0);
Content();
Content(const Content& content);
~Content();
};
struct Data {
// Identifier of the data item, should be unique across the registry. Clients
// will decide what ids to use, they could be paths, urls or any opaque string
// identifiers.
// Ideally IDs should persist across sessions, but this is not strictly
// required now because data is not persisted across sessions.
std::string id;
// Data item will be matched between its search tags and query term.
std::vector<Content> contents;
// Locale of the data. This is currently used by inverted index only.
// If unset, we will use system configured locale.
// TODO(jiameng): apply locale-dependent tokenization to linear map.
std::string locale;
Data(const std::string& id,
const std::vector<Content>& contents,
const std::string& locale = "");
Data();
Data(const Data& data);
~Data();
};
struct SearchParams {
// |relevance_threshold| will be applicable if the backend is kLinearMap.
// Relevance score will be calculated as a combination of prefix and fuzzy
// matching. A Data item is relevant if the overall relevance score is above
// this threshold. The threshold should be in [0,1].
double relevance_threshold = 0.64;
// |prefix_threshold| and |fuzzy_threshold| will be applicable if the backend
// is kInvertedIndex. When a query term is matched against a Data item, it
// will be considered relevant if either its prefix score is above
// |prefix_threshold| or fuzzy score is above |fuzzy_threshold|. Both of these
// thresholds should be in [0,1].
double prefix_threshold = 0.6;
double fuzzy_threshold = 0.7;
};
struct Position {
Position();
Position(const Position& position);
Position(const std::string& content_id, uint32_t start, uint32_t length);
~Position();
std::string content_id;
// TODO(jiameng): |start| and |end| will be implemented for inverted index
// later.
uint32_t start;
uint32_t length;
};
// Result is one item that matches a given query. It contains the id of the item
// and its matching score.
struct Result {
// Id of the data.
std::string id;
// Relevance score.
// Currently only linear map is implemented with fuzzy matching and score will
// always be in [0,1]. In the future, when an inverted index is implemented,
// the score will not be in this range any more. Client will be able to select
// a search backend to use (linear map vs inverted index) and hence client
// will be able to expect the range of the scores.
double score;
// Position of the matching text.
// We currently use linear map, which will return one matching content, hence
// the vector has only one element. When we have inverted index, we will have
// multiple matching contents.
std::vector<Position> positions;
Result();
Result(const Result& result);
Result(const std::string& id,
double score,
const std::vector<Position>& positions);
~Result();
};
// Status of the search attempt.
// These numbers are used for logging and should not be changed or reused. More
// will be added later.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class ResponseStatus {
kUnknownError = 0,
// Search operation is successful. But there could be no matching item and
// result list is empty.
kSuccess = 1,
// Query is empty.
kEmptyQuery = 2,
// Index is empty (i.e. no data).
kEmptyIndex = 3,
kMaxValue = kEmptyIndex
};
// Similar to Position but also contains weight from Content.
// This is used in ranking and is not meant to be returned as part of the search
// results.
struct WeightedPosition {
double weight;
Position position;
WeightedPosition();
WeightedPosition(const WeightedPosition& weighted_position);
WeightedPosition(double weight, const Position& position);
~WeightedPosition();
};
// Stores the token (after processed). |positions| represents the token's
// positions in one document.
struct Token {
Token();
Token(const Token& token);
Token(const std::u16string& text, const std::vector<WeightedPosition>& pos);
~Token();
std::u16string content;
std::vector<WeightedPosition> positions;
};
} // namespace ash::local_search_service
#endif // CHROMEOS_ASH_COMPONENTS_LOCAL_SEARCH_SERVICE_SHARED_STRUCTS_H_
|