File: quality_metrics_filling.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (199 lines) | stat: -rw-r--r-- 8,252 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/core/browser/metrics/quality_metrics_filling.h"

#include <algorithm>

#include "base/containers/fixed_flat_set.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/strcat.h"
#include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/browser/metrics/autofill_metrics_utils.h"

namespace autofill::autofill_metrics {

namespace {

// Heuristic used for filtering fields that are probably not fillable. The
// assumption is that autofilled values typically should have lengths well below
// 150 and that extremely long texts are outliers and should not influence the
// metrics a lot.
constexpr size_t kAutomationRateFieldSizeThreshold = 150;

constexpr std::string_view kUmaDataUtilizationAllTypes =
    "Autofill.DataUtilization.AllFieldTypes.";
constexpr std::string_view kUmaDataUtilizationSelectedTypes =
    "Autofill.DataUtilization.SelectedFieldTypes.";
// Variants for histograms "Autofill.DataUtilization*".
constexpr std::string_view kAggregateVariant = "Aggregate";
constexpr std::string_view kGarbageVariant = "Garbage";
constexpr std::string_view kAutocompleteOffVariant = "AutocompleteOff";
constexpr std::string_view kHadPredictionVariant = "HadPrediction";
constexpr std::string_view kNoPredictionVariant = "NoPrediction";
constexpr std::string_view kGarbageHadPredictionVariant =
    "GarbageHadPrediction";

// Field types whose associated values typically are small numbers (< 100). When
// determining the possible types of a submitted field, the small numbers have a
// high chance of causing false positive matches.
constexpr DenseSet<FieldType> kFieldTypesRepresentingSmallNumbers = {
    CREDIT_CARD_EXP_MONTH,     CREDIT_CARD_EXP_2_DIGIT_YEAR,
    PHONE_HOME_COUNTRY_CODE,   PHONE_HOME_NUMBER_PREFIX,
    ADDRESS_HOME_HOUSE_NUMBER, ADDRESS_HOME_APT_NUM,
    ADDRESS_HOME_FLOOR};

// Records the percentage of input text field characters that were autofilled.
void LogAutomationRate(const FormStructure& form) {
  size_t total_length_autofilled_fields = 0;
  size_t total_length = 0;
  for (const auto& field : form.fields()) {
    if (!field->IsTextInputElement()) {
      continue;
    }
    // The field value at form submission should have changed since page load.
    if (field->initial_value() == field->value()) {
      continue;
    }
    size_t field_size = field->value().size();
    // Skip fields containing too many characters to reduce distortion by
    // fields that are likely not autofillable.
    if (field_size > kAutomationRateFieldSizeThreshold) {
      continue;
    }
    if (field->is_autofilled()) {
      total_length_autofilled_fields += field_size;
    }
    total_length += field_size;
  }
  if (total_length > 0) {
    for (const auto form_type : GetFormTypesForLogging(form)) {
      base::UmaHistogramPercentage(
          base::StrCat({"Autofill.AutomationRate.",
                        FormTypeNameForLoggingToStringView(form_type)}),
          100 * total_length_autofilled_fields / total_length);
    }
  }
}

int GetFieldTypeAutofillDataUtilization(
    FieldType field_type,
    AutofillDataUtilization data_utilization) {
  static_assert(FieldType::MAX_VALID_FIELD_TYPE <= (UINT16_MAX >> 6),
                "Autofill::FieldType value needs more than 10 bits.");

  // Technically only 1 bit is required at this time. Reserving more bits for
  // potential future expansion.
  static_assert(static_cast<int>(AutofillDataUtilization::kMaxValue) <=
                    (UINT16_MAX >> 10),
                "AutofillDataUtilization value needs more than 6 bits");

  return (field_type << 6) | static_cast<int>(data_utilization);
}

// Records, for fields that were submitted with values that were found in the
// user's stored address profiles / credit cards, whether the field value was
// autofilled or manually entered by the user. Note that fields that were
// autofilled and then edited by the user or JavaScript count as "manually
// entered". Note that fields that were submitted with a prefilled value
// don't get recorded. Emitted on form submission.
void LogDataUtilization(const FormStructure& form) {
  for (const auto& field : form.fields()) {
    // A pre-filled field value should have changed since page load. Otherwise,
    // no reporting is necessary.
    if (field->initial_value() == field->value()) {
      continue;
    }
    // Determine fillable possible types.
    DenseSet<FieldType> fillable_possible_types;
    for (FieldType possible_type : field->possible_types()) {
      if (IsFillableFieldType(possible_type)) {
        fillable_possible_types.insert(possible_type);
      }
    }
    if (fillable_possible_types.empty()) {
      continue;
    }
    // Determine if "SelectedFieldTypes" variants should be logged.
    const bool kLogSelectedTypes = !fillable_possible_types.contains_any(
        kFieldTypesRepresentingSmallNumbers);

    const AutofillDataUtilization sample =
        field->is_autofilled() ? AutofillDataUtilization::kAutofilled
                               : AutofillDataUtilization::kNotAutofilled;

    const bool kAutocompleteStateIsGarbage =
        AutofillMetrics::AutocompleteStateForSubmittedField(*field) ==
        AutofillMetrics::AutocompleteState::kGarbage;

    for (std::string_view histogram_base :
         {kUmaDataUtilizationAllTypes, kUmaDataUtilizationSelectedTypes}) {
      if (histogram_base == kUmaDataUtilizationSelectedTypes &&
          !kLogSelectedTypes) {
        continue;
      }
      // Emit "Aggregate" variants.
      base::UmaHistogramEnumeration(
          base::StrCat({histogram_base, kAggregateVariant}), sample);

      // Emit "Garbage" variants.
      if (kAutocompleteStateIsGarbage) {
        base::UmaHistogramEnumeration(
            base::StrCat({histogram_base, kGarbageVariant}), sample);
      }

      // Emit "HadPrediction" and "NoPrediction" variants.
      const bool kHadPrediction =
          field->Type().GetStorableType() > FieldType::EMPTY_TYPE;
      const std::string_view kPredictionVariant =
          kHadPrediction ? kHadPredictionVariant : kNoPredictionVariant;
      base::UmaHistogramEnumeration(
          base::StrCat({histogram_base, kPredictionVariant}), sample);

      // Emit "GarbageHadPrediction" variants.
      if (kHadPrediction && kAutocompleteStateIsGarbage) {
        base::UmaHistogramEnumeration(
            base::StrCat({histogram_base, kGarbageHadPredictionVariant}),
            sample);
      }
    }

    // Emit breakdown by possible type, also emit `kAutocompleteOffVariant`.
    for (FieldType type : fillable_possible_types) {
      base::UmaHistogramSparse(
          "Autofill.DataUtilization.ByPossibleType",
          GetFieldTypeAutofillDataUtilization(type, sample));
      // Emit "HadPrediction" and "NoPrediction" variants.
      const bool kHadPrediction =
          field->Type().GetStorableType() > FieldType::EMPTY_TYPE;
      const std::string_view kPredictionVariant =
          kHadPrediction ? kHadPredictionVariant : kNoPredictionVariant;
      base::UmaHistogramSparse(
          base::StrCat({"Autofill.DataUtilization.", kPredictionVariant,
                        ".ByPossibleType"}),
          GetFieldTypeAutofillDataUtilization(type, sample));
      // Emit variant for Garbage and Autocomplete off.
      if (kAutocompleteStateIsGarbage) {
        base::UmaHistogramSparse(
            base::StrCat({"Autofill.DataUtilization.", kGarbageVariant,
                          kPredictionVariant, ".ByPossibleType"}),
            GetFieldTypeAutofillDataUtilization(type, sample));
      } else if (field->autocomplete_attribute() == "off") {
        base::UmaHistogramSparse(
            base::StrCat({"Autofill.DataUtilization.", kAutocompleteOffVariant,
                          kPredictionVariant, ".ByPossibleType"}),
            GetFieldTypeAutofillDataUtilization(type, sample));
      }
    }
  }
}

}  // namespace

void LogFillingQualityMetrics(const FormStructure& form) {
  LogAutomationRate(form);
  LogDataUtilization(form);
}

}  // namespace autofill::autofill_metrics