1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package segmentation_platform.proto;
import "components/segmentation_platform/public/proto/aggregation.proto";
import "components/segmentation_platform/public/proto/output_config.proto";
import "components/segmentation_platform/public/proto/types.proto";
// The version is used to verify if the metadata provided by the server is
// supported in current version of the code. Update the version number for any
// new feature added to metadata proto, and add a log of the new changes in the
// current version in this file.
// Version 0 supports UMA features and aggregation in |features| field.
// Version 1 supports UMA features, custom inputs and sql features in
// |input_features| field.
// Version 2 supports training data output collection in |training_outputs|
// field.
// Version 3 supports trigger configurations for training data collection.
enum CurrentVersion {
METADATA_VERSION = 3;
}
// Version information for segmentation models.
message VersionInfo {
// Minimum model metadata version that is supported. Some newer
// features/fields might not be available before this version. This field is
// set on the server and read by the client to verify if model is valid.
optional int32 metadata_min_version = 1;
// Current model metadata version. This field is set by the client while
// sending a model download request to optimization guide server so that the
// server knows the capabilities of the client.
optional int32 metadata_cur_version = 2;
}
// Used to identify the source of the model whether it is a client side or
// server side model.
enum ModelSource {
UNKNOWN_MODEL_SOURCE = 0;
SERVER_MODEL_SOURCE = 1; // Represents server side model.
DEFAULT_MODEL_SOURCE = 2; // Represents client side model.
}
message UMAFeature {
// The type of signal this feature refers to.
// Note: SignalType::UKM_EVENT type is only used for SignalStorageConfig and
// should not be used as uma feature's signal type.
optional SignalType type = 1;
// The human readable name of the histogram or user action.
optional string name = 2;
// The hash of the histogram name or user action. Must match the result of
// base::HashMetricName.
optional fixed64 name_hash = 3;
// Number of buckets to include in the result. If set to 0, no data will be
// collected. This can be used to start storing data before it should be used.
// See documentation for Aggregation for details.
optional uint64 bucket_count = 4;
// The required length of the calculated result. See documentation for
// Aggregation for details.
optional uint64 tensor_length = 5;
// The type of aggregation to use for this particular feature.
optional Aggregation aggregation = 6;
// Only set if type == HISTOGRAM_ENUM.
// Matches are only valid when the enum ID matches any of these.
// Works like an OR condition, e.g.: [url, search, …] or just [url].
repeated int32 enum_ids = 7;
// Only set if aggregation == LATEST_OR_DEFAULT.
// Value used for model if latest value requested is not available in the
// database. The number of entries should be equal to the tensor_length.
repeated float default_values = 8;
}
message CustomInput {
// This parameter is required.
// 1. If the param is directly used as the input tensor field to the model,
// then this specifies the number of columns to fill in the tensor. In this
// case the value should be float.
// 2. If the param is used as a bind value for sql features, then this
// specifies the number of sql bindings to fill in the sql query.
optional int32 tensor_length = 1;
// Used to distinguish between different types of custom inputs.
enum FillPolicy {
// Custom functions provided by the engine that fills in the input feature
// to the model.
UNKNOWN_FILL_POLICY = 0;
// Output is the time at which model prediction is needed. Can be used to
// bind TIME type param to queries.
// Output type: Time
// Output length: 1
FILL_PREDICTION_TIME = 1;
// Output is two timestamps, the beginning and the end of last x days. Can
// be used to bind TIME type param to query within a time interval.
// Output type: Time
// Output length: 2
// Additional arg:
// `bucket_count`: Required. Number of buckets to include in the result.
TIME_RANGE_BEFORE_PREDICTION = 2;
// Used to determine whether a given page is a product details page and can
// be price tracked.
PRICE_TRACKING_HINTS = 3;
// This type of custom input is used directly to fill the input tensor to
// the model or to another query.
// Output type: ProcessedValue
// Output length: 1
// Additional arg:
// `name`: Optional. The name of the field to be looked up in input
// context. If missing then the |name| field is used.
FILL_FROM_INPUT_CONTEXT = 4;
// Output is a tensor of length 10 consisting of float values denoting
// various devices count by type with different form factor and os type.
// See `SyncDeviceInfoObserver` for description of each value.
// Output type: float
// Output length: 10
// Additional arg:
// `wait_for_device_info_in_seconds`: Number of seconds to wait for sync
// device info before timeout. If 0, then does not wait for sync and times
// out immediately if device info is not available.
// InputContext arg:
// `active_days_limit`: Number of days after which the device is
// considered not active after last sync. Must be INT.
FILL_SYNC_DEVICE_INFO = 5;
// Output is a tensor of length 1 consisting device RAM in MB.
// Output type: float
// Output length: 1
FILL_DEVICE_RAM_MB = 6;
// Output is a tensor of length 1 describing device OS level.
// Output type: float
// Output length: 1
FILL_DEVICE_OS_VERSION_NUMBER = 7;
// Output is a tensor of length 1 giving pixels per inch for the current
// device used by the user.
// Output type: float
// Output length: 1
FILL_DEVICE_PPI = 8;
// Fills metrics about a given tab. A `tab_id` and `session_tag` is expected
// from input_context.
// Output type: float
// Output length: `TabSessionSource::kNumInputs`
FILL_TAB_METRICS = 9;
// Fills a random number between [0, 1).
// Output type: float
// Output length: 1
FILL_RANDOM = 10;
// Fill various metrics from the shopping service. Currently only support
// shopping bookmark count.
// Output type: float
// Output length: 1
FILL_FROM_SHOPPING_SERVICE = 11;
}
// The fill type of the custom input.
optional FillPolicy fill_policy = 2;
// If the current chrome version does not support this fill type, use this
// value. If this is not specified and the function is unavailable, the model
// will not run due to missing input. The number of entries should be equal to
// the |tensor_length|.
repeated float default_value = 3;
// If the fill type need additional arguments, use this value.
map<string, string> additional_args = 4;
// The human readable name of the custom input.
optional string name = 5;
}
// Configuration for storing signals in the SQL database.
message SignalFilterConfig {
// Defines a single UKM event that should be stored.
message UkmEvent {
// Event hash of the UKM event.
optional uint64 event_hash = 1;
// List of metric hashes for the event, to store in the database. It is
// is required to provide list of necessary metrics.
// TODO: Support empty metric hash list, the database will store all the
// metrics for the UKM event.
repeated uint64 metric_hash_filter = 2;
}
// List of UKM events to store in the database.
repeated UkmEvent ukm_events = 1;
}
message SqlFeature {
// The query should select a single float column. The query can contain '?'
// which can be used to bind values using |bind_values| list.
// TODO(ssid): Consider expanding this to return multiple input tensor
// features.
optional string sql = 1;
// List of signals needed in the storage for the query.
optional SignalFilterConfig signal_filter = 2;
// Used to bind value for the SQL query.
message BindValue {
// The bind field numbers, in range of 0 to n-1, for n question marks in the
// SQL query.
repeated int32 bind_field_index = 1;
// Used to call Bind*() in sql::Statement.
enum ParamType {
UNKNOWN = 0;
NULL = 1;
BOOL = 2;
INT = 3;
INT64 = 4;
DOUBLE = 5;
STRING = 6;
TIME = 7;
}
optional ParamType param_type = 2;
// Value of the input to bind the query. The custom function should return
// the specified param type. The |tensor_length| should be 0 since these
// inputs can only be used for SQL bind values.
optional CustomInput value = 3;
}
repeated BindValue bind_values = 3;
// The human readable name of the ukm event and metric.
optional string name = 4;
}
// Contains a feature used as an input to the ML model.
message InputFeature {
oneof Feature {
// An UMAFeature type of input feature.
UMAFeature uma_feature = 1;
// A custom input type of input feature.
CustomInput custom_input = 2;
// Input feature computed using SQL query.
SqlFeature sql_feature = 3;
}
}
// Contains a list of training output generators. The ML model pipeline can
// iterate on different output candidates and select the final output generator.
message TrainingOutputs {
repeated TrainingOutput outputs = 1;
// Config for triggering the training outputs data collection for the current
// model.
message TriggerConfig {
// Describes how the training outputs are collected.
enum DecisionType {
// By default considered as PERIODIC type.
UNKNOWN = 0;
// The on demand scheduler will trigger training data collection when the
// client asks for a model execution with input context.
ONDEMAND = 1;
// The periodic scheduler will trigger training data collection everyday.
// Currently this period is fixed on the client to 1 day.
PERIODIC = 2;
}
optional DecisionType decision_type = 1;
message ObservationTrigger {
oneof trigger {
// The delay, in seconds, to collect output tensors after input tensors
// are collected. For example, output labels can be collected one week
// after input tensors are collected. Set to 0 if output tensors need to
// be collected in the same time period as input tensors.
uint64 delay_sec = 1;
// The user action or histogram to trigger a training data output
// collection. Note: Only the name and type should be used with
// bucket_duration = 0.
// TODO(crbug.com/40239034): Figure out how to include the trigger as
// one of the outputs automatically.
UMAOutput uma_trigger = 2;
}
}
// List of triggers, whichever is hit first is used to upload the training
// data.
repeated ObservationTrigger observation_trigger = 2;
// Only for PERIODIC trigger. The prediction and observation times can be
// exact or flexible. The exact prediction setting forces the prediction
// time to be the time at which the segment selection or classification
// result was changed. The input features will be collected till the
// prediction time. Flexible prediction time setting allows the collector to
// pick any point in the past as the prediction time, usually pick the
// current time. The training data collection is triggered once a day with a
// rolling window whenever Chrome is active. This setting uploads more
// training data samples. By default the prediction time is FLEXIBLE. The
// exact observation time setting will be used only in case of exact
// prediction case and the observation starts exactly after prediction time.
// Flexible observation can be used to get most recent user behavior by
// setting observation time to the time of upload, which could be later than
// end of the observation period. By default the observation time is EXACT.
optional bool use_exact_prediction_time = 3;
optional bool use_flexible_observation_time = 4;
}
optional TriggerConfig trigger_config = 2;
}
// Generic type to define how to generate the training data output.
// TODO(xingliu): Add more implementation details about how output training data
// is generated.
message TrainingOutput {
oneof output {
// Training data output is generated from UMA metrics.
UMAOutput uma_output = 1;
}
}
// Contains the information to generate the output for training data based on a
// particular UMA metric.
message UMAOutput {
// The UMA metric to generate the training data output.
optional UMAFeature uma_feature = 1;
// The duration to trigger a training data collection, unit in TimeUnit. If
// not specified or 0, the training data will be generated immediately after
// certain UMA is recorded.
optional uint64 duration = 2;
}
// Metadata about a segmentation model for a given segment. Contains information
// on how to use the model such as collecting signals, interpreting results etc.
// Next tag: 16
message SegmentationModelMetadata {
// Values for obsolete fields.
reserved 15;
// The version information needed to validate segmentation models.
optional VersionInfo version_info = 9;
// DEPRECATED: Use |input_features.uma_feature| instead. Only one of
// |features| or |input_features| can be used in the config, not both. An
// ordered list of required features.
repeated UMAFeature features = 1;
// An ordered list of required features and custom inputs. Only one of
// |features| or |input_features| can be used in the config, not both.
repeated InputFeature input_features = 10;
// A list of training data output definitions.
optional TrainingOutputs training_outputs = 11;
// The time unit to be used for the rest of this proto.
optional TimeUnit time_unit = 2;
// The size of each interval the data should be aggregated over.
optional uint64 bucket_duration = 3;
// For how long should data be stored for this model.
optional int64 signal_storage_length = 4;
// For how long do we have to have captured data for this model. If the
// relevant signals have been captured for a shorter amount of time than this,
// this model can never be selected.
optional int64 min_signal_collection_length = 5;
// Describes how long after a valid result has been calculated for this model
// it is OK to cache the result without recalculating with updated data.
optional int64 result_time_to_live = 6;
// The model always executes with a fixed timestamp. This is used when the
// model is trained on data from a specific time period, and needs to evaluate
// on the same date.
optional int64 fixed_prediction_timestamp = 17;
message DiscreteMapping {
// A mapping result from the raw continuous result to a discrete and
// comparable value based on |rank|.
message Entry {
// The minimum result of the model to be allowed to choose this mapping.
optional float min_result = 1;
// A feature specific rank.
optional int64 rank = 2;
}
// An ordered (based on their |min_result|) list of discrete mappings.
// To map a model evaluation result to a DiscreteMapping, choose the highest
// |min_value| that the evaluation result is at or above.
// E.g. for these mappings: [(0.0, 0), (0.4, 1), (0.7, 2), (0.9, 3)], a
// result of 0.7 would yield (0.7, 2), and 0.69 would yield (0.4, 1).
repeated Entry entries = 1;
}
map<string, DiscreteMapping> discrete_mappings = 7;
// The default key to use during the mapping process if no key has been
// provided.
optional string default_discrete_mapping = 8;
// The delay, in seconds, to collect output tensors after input tensors are
// collected. For example, output labels can be collected one week after input
// tensors are collected. If not specified, output tensors are collected in
// the same time period as input tensors.
// DEPRECATED: optional int64 output_collection_delay_sec = 12;
reserved 12;
// Whether the client should upload the input and output tensors through UKM.
optional bool upload_tensors = 13;
// Describes the return type of the model score. Used for recording
// histograms.
enum OutputDescription {
UNKNOWN_RETURN_TYPE = 0;
// Model returns either 0 or 1.
RETURN_TYPE_HEURISTIC = 1;
// Model returns an int corresponding to a specific subsegment. Assume
// between 0 and 100.
RETURN_TYPE_MULTISEGMENT = 2;
// Model returns a float between 0 and 1.
RETURN_TYPE_PROBABILITY = 3;
// Model returns any integer value.
RETURN_TYPE_INTEGER = 4;
}
// TODO(ritikagup@): Deprecate the field.
optional OutputDescription return_type = 14;
// Contains information about the model results. Supplied by the client. It
// gives a description of how should the results look like and how to
// interpret them.
optional OutputConfig output_config = 16;
}
|