File: stats.h

package info (click to toggle)
chromium 139.0.7258.127-1
links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (386 lines) | stat: -rw-r--r-- 16,292 bytes
parent folder | download | duplicates (9)
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SEGMENTATION_PLATFORM_INTERNAL_STATS_H_
#define COMPONENTS_SEGMENTATION_PLATFORM_INTERNAL_STATS_H_

#include <list>
#include <optional>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "base/memory/scoped_refptr.h"
#include "base/memory/weak_ptr.h"
#include "base/no_destructor.h"
#include "base/sequence_checker.h"
#include "base/task/sequenced_task_runner.h"
#include "base/thread_annotations.h"
#include "components/segmentation_platform/internal/execution/model_execution_status.h"
#include "components/segmentation_platform/internal/metadata/metadata_utils.h"
#include "components/segmentation_platform/internal/selection/segment_result_provider.h"
#include "components/segmentation_platform/public/config.h"
#include "components/segmentation_platform/public/model_provider.h"
#include "components/segmentation_platform/public/proto/segmentation_platform.pb.h"
#include "components/segmentation_platform/public/proto/types.pb.h"
#include "components/segmentation_platform/public/segment_selection_result.h"

namespace segmentation_platform::stats {

using proto::SegmentId;

// Keep in sync with AdaptiveToolbarSegmentSwitch in enums.xml.
// Visible for testing.
enum class AdaptiveToolbarSegmentSwitch {
  kUnknown = 0,
  kNoneToNewTab = 1,
  kNoneToShare = 2,
  kNoneToVoice = 3,
  kNewTabToNone = 4,
  kShareToNone = 5,
  kVoiceToNone = 6,
  kNewTabToShare = 7,
  kNewTabToVoice = 8,
  kShareToNewTab = 9,
  kShareToVoice = 10,
  kVoiceToNewTab = 11,
  kVoiceToShare = 12,
  kMaxValue = kVoiceToShare,
};

// Keep in sync with SegmentationBooleanSegmentSwitch in enums.xml.
// Visible for testing.
enum class BooleanSegmentSwitch {
  kUnknown = 0,
  kNoneToEnabled = 1,
  kEnabledToNone = 2,
  kMaxValue = kEnabledToNone,
};

// Records the time difference between when a new version of model from
// optimization guide is available and when the model is initialized in the
// client.
void RecordModelUpdateTimeDifference(SegmentId segment_id,
                                     int64_t model_update_time);

// Records the result of segment selection whenever segment selection is
// computed.
void RecordSegmentSelectionComputed(
    const Config& config,
    SegmentId new_selection,
    std::optional<SegmentId> previous_selection);

// Records the post processed result whenever computed. This is recorded when
// results are obtained by eithier executing the model or getting a valid score
// from database.
void RecordClassificationResultComputed(
    const Config& config,
    const proto::PredictionResult& new_result);

// Records from which old value to which new value the topmost label is changing
// to when prefs expired and is updated with new result.
void RecordClassificationResultUpdated(
    const Config& config,
    const proto::PredictionResult* old_result,
    const proto::PredictionResult& new_result);

// Database Maintenance metrics.
// Records the number of unique signal identifiers that were successfully
// cleaned up.
void RecordMaintenanceCleanupSignalSuccessCount(size_t count);
// Records the result for each compaction attempt for a particular signal type.
void RecordMaintenanceCompactionResult(proto::SignalType signal_type,
                                       bool success);
// Records the number of signal identifiers that were found that we should aim
// to clean up.
void RecordMaintenanceSignalIdentifierCount(size_t count);

// Model Delivery metrics.
// Records whether any incoming ML had metadata attached that
// we were able to parse.
void RecordModelDeliveryHasMetadata(SegmentId segment_id, bool has_metadata);
// Records the number of tensor features an updated server or embedded model
// has.
void RecordModelDeliveryMetadataFeatureCount(SegmentId segment_id,
                                             proto::ModelSource model_source,
                                             size_t count);
// Records the result of validating the metadata of an incoming server or
// embedded model. Recorded before and after it has been merged with the already
// stored metadata.
void RecordModelDeliveryMetadataValidation(
    SegmentId segment_id,
    proto::ModelSource model_source,
    bool processed,
    metadata_utils::ValidationResult validation_result);
// Record what type of server or embedded model metadata we received .
void RecordModelDeliveryReceived(SegmentId segment_id,
                                 proto::ModelSource model_source);
// Records the result of attempting to save an updated version of the server or
// embedded model metadata.
void RecordModelDeliverySaveResult(SegmentId segment_id,
                                   proto::ModelSource model_source,
                                   bool success);
// Records the result of attempting to delete the previous version of a server
// model metadata.
void RecordModelDeliveryDeleteResult(SegmentId segment_id,
                                     proto::ModelSource model_source,
                                     bool success);
// Records whether the currently stored segment_id matches the incoming
// segment_id for a particular model_source, as these are expected to match.
void RecordModelDeliverySegmentIdMatches(SegmentId segment_id,
                                         proto::ModelSource model_source,
                                         bool matches);

// Model Execution metrics.
// Records the duration of processing a single ML feature. This only takes into
// account the time it takes to process (aggregate) a feature result, not
// fetching it from the database. It also takes into account filtering any
// enum histograms.
void RecordModelExecutionDurationFeatureProcessing(SegmentId segment_id,
                                                   base::TimeDelta duration);
// Records the duration of executing an ML model. This only takes into account
// the time it takes to invoke and wait for a result from the underlying ML
// infrastructure from //components/optimization_guide, and not fetching the
// relevant data from the database.
void RecordModelExecutionDurationModel(SegmentId segment_id,
                                       bool success,
                                       base::TimeDelta duration);
// Records the duration of fetching data for, processing, and executing an ML
// model.
void RecordModelExecutionDurationTotal(SegmentId segment_id,
                                       ModelExecutionStatus status,
                                       base::TimeDelta duration);

// Records the total duration for GetClassificationResult API starting from the
// time request arrives in segmentation service until the result has been
// returned. It includes feature processing and model execution as well.
void RecordClassificationRequestTotalDuration(const Config& config,
                                              base::TimeDelta duration);

// Records the total duration of on-demand segment selection which includes
// running all the models associated with the client and computing result.
void RecordOnDemandSegmentSelectionDuration(
    const Config& config,
    const SegmentSelectionResult& result,
    base::TimeDelta duration);
// Records the result value after successfully executing an ML model.
void RecordModelExecutionResult(
    SegmentId segment_id,
    float result,
    proto::SegmentationModelMetadata::OutputDescription return_type);
// Records the raw model score. Records each element of the result tensor as a
// separate histogram identified by its index. For binary and multi-class
// models, the raw score is multiplied by 100 to transform as percent score,
// whereas for binned classifier the result is recorded as is in a percent
// range.
void RecordModelExecutionResult(SegmentId segment_id,
                                const ModelProvider::Response& result,
                                proto::OutputConfig output_config);
// Records whether the result value of of executing an ML model was successfully
// saved.
void RecordModelExecutionSaveResult(SegmentId segment_id, bool success);
// Records the final execution status for any ML model execution.
void RecordModelExecutionStatus(SegmentId segment_id,
                                bool default_provider,
                                ModelExecutionStatus status);
// Records the percent of features in a tensor that are equal to 0 when the
// segmentation model is executed.
void RecordModelExecutionZeroValuePercent(SegmentId segment_id,
                                          const std::vector<float>& tensor);

// Signal Database metrics.
// Records the number of database entries that were fetched from the database
// during a call to GetSamples. This is not the same as the sample count since
// each database entry can contain multiple samples.
void RecordSignalDatabaseGetSamplesDatabaseEntryCount(size_t count);
// Records the result of fetching data from the database during a call to
// GetSamples.
void RecordSignalDatabaseGetSamplesResult(bool success);
// Records the number of samples that were returned after reading entries from
// the database, during a call to GetSamples. This is not the same as the
// database entry count, since each entry can contain multiple samples.
void RecordSignalDatabaseGetSamplesSampleCount(size_t count);

// Records the result of persisting SegmentInfo changes to disk.
void RecordSegmentInfoDatabaseUpdateEntriesResult(SegmentId segment_id,
                                                  bool success);

// Records the number of unique user action and histogram signals that we are
// currently tracking.
void RecordSignalsListeningCount(
    const std::set<uint64_t>& user_actions,
    const std::set<std::pair<std::string, proto::SignalType>>& histograms);

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused. Please keep in sync with
// "SegmentationSelectionFailureReason" in
// //tools/metrics/histograms/enums.xml.
enum class SegmentationSelectionFailureReason {
  kDeprecatedPlatformDisabled = 0,
  kSelectionAvailableInPrefs = 1,
  kServerModelDatabaseScoreNotReady = 2,
  kServerModelSignalsNotCollected = 3,
  kSelectionTtlNotExpired = 4,
  kAtLeastOneModelFailedExecution = 5,
  kAtLeastOneModelNeedsMoreSignals = 6,
  kAtLeastOneModelWithInvalidMetadata = 7,
  kFailedToSaveModelResult = 8,
  kInvalidSelectionResultInPrefs = 9,
  kDBInitFailure = 10,
  kServerModelSegmentInfoNotAvailable = 11,
  kDefaultModelSignalsNotCollected = 12,
  kDefaultModelExecutionFailed = 13,
  kDefaultModelSegmentInfoNotAvailable = 14,
  kServerModelExecutionFailed = 15,
  kSelectionAvailableInProtoPrefs = 16,
  kInvalidSelectionResultInProtoPrefs = 17,
  kProtoPrefsUpdateNotRequired = 18,
  kProtoPrefsUpdated = 19,
  kServerModelDatabaseScoreUsed = 20,
  kDefaultModelExecutionScoreUsed = 21,
  kServerModelExecutionScoreUsed = 22,
  kMultiOutputNotSupported = 23,
  kOnDemandModelExecutionFailed = 24,
  kClassificationResultFromPrefs = 25,
  kClassificationResultNotAvailableInPrefs = 26,
  kDefaultModelDatabaseScoreUsed = 27,
  kDefaultModelDatabaseScoreNotReady = 28,
  kCachedResultUnavailableExecutingOndemand = 29,
  kOnDemandExecutionFailedReturningCachedResult = 30,
  kMaxValue = kOnDemandExecutionFailedReturningCachedResult,
};

// Records the reason for failure or success to compute a segment selection.
void RecordSegmentSelectionFailure(const Config& config,
                                   SegmentationSelectionFailureReason reason);

// Keep in sync with SegmentationPlatformFeatureProcessingError in
// //tools/metrics/histograms/enums.xml.
enum class FeatureProcessingError {
  kUkmEngineDisabled = 0,
  kUmaValidationError = 1,
  kSqlValidationError = 2,
  kCustomInputError = 3,
  kSqlBindValuesError = 4,
  kSqlQueryRunError = 5,
  kResultTensorError = 6,
  kSuccess = 7,
  kMaxValue = kSuccess,
};

// Return a string display for the given FeatureProcessingError.
std::string FeatureProcessingErrorToString(FeatureProcessingError error);

// Records the type of error encountered during feature processing.
void RecordFeatureProcessingError(SegmentId segment_id,
                                  FeatureProcessingError error);

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused. Please keep in sync with
// "SegmentationModelAvailability" in //tools/metrics/histograms/enums.xml.
enum class SegmentationModelAvailability {
  kModelHandlerCreated = 0,
  kModelAvailable = 1,
  kMetadataInvalid = 2,
  kNoModelAvailable = 3,
  kMaxValue = kNoModelAvailable
};
// Records the availability of segmentation models for each target needed.
void RecordModelAvailability(SegmentId segment_id,
                             SegmentationModelAvailability availability);

// Records the number of input tensor that's causing a failure to upload
// structured metrics.
void RecordTooManyInputTensors(int tensor_size);

// Analytics events for training data collection. Sync with
// SegmentationPlatformTrainingDataCollectionEvent in enums.xml.
enum class TrainingDataCollectionEvent {
  kImmediateCollectionStart = 0,
  kImmediateCollectionSuccess = 1,
  kModelInfoMissing = 2,
  kMetadataValidationFailed = 3,
  kGetInputTensorsFailed = 4,
  kNotEnoughCollectionTime = 5,
  kUkmReportingFailed = 6,
  kPartialDataNotAllowed = 7,
  kContinousCollectionStart = 8,
  kContinousCollectionSuccess = 9,
  kCollectAndStoreInputsSuccess = 10,
  kObservationTimeReached = 11,
  kDelayedTaskPosted = 12,
  kImmediateObservationPosted = 13,
  kWaitingForNonDelayedTrigger = 14,
  kHistogramTriggerHit = 15,
  kNoSegmentInfo = 16,
  kDisallowedForRecording = 17,
  kObservationDisallowed = 18,
  kTrainingDataMissing = 19,
  kOnDecisionTimeTypeMistmatch = 20,
  kDelayTriggerSampled = 21,
  kContinousExactPredictionTimeCollectionStart = 22,
  kContinousExactPredictionTimeCollectionSuccess = 23,
  kMaxValue = kContinousExactPredictionTimeCollectionSuccess
};

std::string TrainingDataCollectionEventToErrorMsg(
    TrainingDataCollectionEvent event);

// Records analytics for training data collection.
void RecordTrainingDataCollectionEvent(SegmentId segment_id,
                                       TrainingDataCollectionEvent event);

SegmentationSelectionFailureReason GetSuccessOrFailureReason(
    SegmentResultProvider::ResultState result_state);

// Helper to collect UMA metrics and record in a non-blocking thread.
class BackgroundUmaRecorder {
 public:
  // Delay to collect metrics and record. Set to short time so if Chrome dies we
  // don't lose too many metrics.
  constexpr static base::TimeDelta kMetricsCollectionDelay = base::Seconds(5);

  static BackgroundUmaRecorder& GetInstance();

  BackgroundUmaRecorder(const BackgroundUmaRecorder&) = delete;
  BackgroundUmaRecorder& operator=(const BackgroundUmaRecorder&) = delete;

  // If initialized then records metrics in a worker thread, otherwise records
  // metrics in current thread, blocking. Can be called multiple times safely.
  void Initialize();
  void InitializeForTesting(
      scoped_refptr<base::SequencedTaskRunner> bg_task_runner);

  // Force flush all samples in current thread.
  void FlushSamples();

  // Add metrics to UMA.
  void AddMetric(base::OnceClosure add_sample);

  scoped_refptr<base::SequencedTaskRunner> bg_task_runner_for_testing() {
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_check_);
    return bg_task_runner_;
  }

 private:
  friend class base::NoDestructor<BackgroundUmaRecorder>;

  BackgroundUmaRecorder();
  ~BackgroundUmaRecorder();

  base::Lock lock_;
  std::list<base::OnceClosure> add_samples_ GUARDED_BY(lock_);
  bool pending_task_ GUARDED_BY(lock_){false};

  scoped_refptr<base::SequencedTaskRunner> bg_task_runner_;
  // Protects `bg_task_runner_`. If we need to record metrics from non-main
  // thread, do not use this class and record directly.
  SEQUENCE_CHECKER(sequence_check_);
  base::WeakPtrFactory<BackgroundUmaRecorder> weak_factory_{this};
};

}  // namespace segmentation_platform::stats

#endif  // COMPONENTS_SEGMENTATION_PLATFORM_INTERNAL_STATS_H_