File: autofill_ablation_study.cc

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (242 lines) | stat: -rw-r--r-- 9,914 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/core/browser/studies/autofill_ablation_study.h"

#include "base/base64.h"
#include "base/check_op.h"
#include "base/command_line.h"
#include "base/containers/span.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/field_trial.h"
#include "base/no_destructor.h"
#include "base/numerics/byte_conversions.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/time/time.h"
#include "base/types/zip.h"
#include "components/autofill/core/browser/integrators/optimization_guide/autofill_optimization_guide.h"
#include "components/autofill/core/common/autofill_clock.h"
#include "components/autofill/core/common/autofill_features.h"
#include "components/autofill/core/common/autofill_prefs.h"
#include "components/optimization_guide/proto/hints.pb.h"
#include "components/prefs/pref_service.h"
#include "crypto/hash.h"
#include "third_party/icu/source/i18n/unicode/timezone.h"
#include "url/gurl.h"
#include "url/origin.h"

namespace autofill {

using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList1Param;
using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList2Param;
using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList3Param;
using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList4Param;
using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList5Param;
using ::autofill::features::
    kAutofillAblationStudyAblationWeightPerMilleList6Param;
using ::autofill::features::kAutofillAblationStudyAblationWeightPerMilleParam;
using ::autofill::features::kAutofillAblationStudyEnabledForAddressesParam;
using ::autofill::features::kAutofillAblationStudyEnabledForPaymentsParam;
using ::autofill::features::kAutofillEnableAblationStudy;
using ::autofill::features::test::kAutofillShowTypePredictions;

namespace {

// Number of bytes that we use to randomly seed the hash.
constexpr size_t kSeedLengthInBytes = 8;

// Returns the ablation seed from prefs and creates one if that has not happened
// before.
std::string GetSeed(PrefService* pref_service) {
  if (!pref_service) {
    return std::string();
  }
  if (!pref_service->HasPrefPath(autofill::prefs::kAutofillAblationSeedPref)) {
    pref_service->SetString(
        autofill::prefs::kAutofillAblationSeedPref,
        base::Base64Encode(base::RandBytesAsVector(kSeedLengthInBytes)));
  }
  return pref_service->GetString(autofill::prefs::kAutofillAblationSeedPref);
}

}  // namespace

// Returns the number of days since Windows epoch but aligns timezones so that
// the first day starts at midnight in the local timezone (ignoring daylight
// saving time).
int DaysSinceLocalWindowsEpoch(base::Time now) {
  base::TimeDelta delta = now.ToDeltaSinceWindowsEpoch();

  // Windows Epoch coincides with 1601-01-01 00:00:00 UTC.
  // If on 1601-01-01 some settler on the East Cost of North America (UTC+5)
  // turned on their computer at midnight, their base::Time::now() was
  // 1601-01-01 00:00:00 UTC+05, i.e. 6 * 60 * 60 seconds after UTC midnight but
  // 0 seconds after local midnight. For that reason, we should decrease delta
  // by the timeoffset of the timezone to virtually strip of the timezone.
  // In other words, 2024-06-27 10:00:00 ETC would be mapped to
  // 2024-06-27 10:00:00 UTC and all calculation would happen with that value so
  // that we stay in UTC. This way the day window aligns with midnights.
  std::unique_ptr<icu::TimeZone> zone =
      base::WrapUnique(icu::TimeZone::createDefault());

  // We don't take daylight saving into account. The complexity is not worth it.
  int32_t raw_offset_in_ms = zone->getRawOffset();

  // The time offset for EST is negative. Therefore, we add the negative number.
  delta += base::Milliseconds(raw_offset_in_ms);

  return delta.InDays();
}

// Returns a 64 bit hash of |seed|, the site and the current day, which is can
// be used to decide whether a form impression should be exposed to autofill
// ablation.
uint64_t GetAblationHash(const std::string& seed,
                         const GURL& url,
                         base::Time now) {
  crypto::hash::Hasher hasher(crypto::hash::kSha256);

  // Incorporate |seed| into the MD5Sum. This ensures that on each browser
  // start the behavior is shuffled.
  hasher.Update(seed);

  // Incorporate |url|'s security origin into the MD5Sum. This ensures that
  // different sites can have different behavior but the behavior on a single
  // site remains consistent.
  // Invalid and non-standard origins are parsed as opaque origins, which
  // serialize as the string "null". This makes all of them identical. Given
  // that we expect |url| to be a mainframe URL this should be sufficiently rare
  // so that individual users don't experience an excessive amount of ablation
  // cases.
  url::Origin origin = url::Origin::Create(url);
  hasher.Update(origin.Serialize());

  // Incorporate the date into MD5Sum. This ensures that the behavior stays the
  // same during a `kAblationWindowInDays` period but changes afterwards.
  int days_since_epoch = DaysSinceLocalWindowsEpoch(now);
  int day_window = days_since_epoch / kAblationWindowInDays;
  hasher.Update(base::NumberToString(day_window));

  // Derive 64 bit hash.
  std::array<uint8_t, crypto::hash::kSha256Size> hash;
  hasher.Finish(hash);
  return base::U64FromBigEndian(base::span(hash).first<sizeof(uint64_t)>());
}

int GetDayInAblationWindow(base::Time now) {
  return DaysSinceLocalWindowsEpoch(now) % kAblationWindowInDays;
}

AutofillAblationStudy::AutofillAblationStudy(std::string_view seed)
    : seed_(seed) {}
AutofillAblationStudy::AutofillAblationStudy(PrefService* local_state)
    : seed_(GetSeed(local_state)) {}
AutofillAblationStudy::~AutofillAblationStudy() = default;

// static
const AutofillAblationStudy& AutofillAblationStudy::disabled_study() {
  // The empty seed creates a disabled ablation study.
  static base::NoDestructor<AutofillAblationStudy> ablation_study{""};
  return *ablation_study;
}

AblationGroup AutofillAblationStudy::GetAblationGroup(
    const GURL& url,
    FormTypeForAblationStudy form_type,
    AutofillOptimizationGuide* autofill_optimization_guide) const {
  if (!base::FeatureList::IsEnabled(kAutofillEnableAblationStudy)) {
    return AblationGroup::kDefault;
  }
  if (base::FeatureList::IsEnabled(kAutofillShowTypePredictions)) {
    // Disable ablation study while debugging.
    return AblationGroup::kDefault;
  }

  // Exit early if the ablation study is not enabled for a certain form type.
  switch (form_type) {
    case FormTypeForAblationStudy::kOther:
      return AblationGroup::kDefault;
    case FormTypeForAblationStudy::kAddress:
      if (!kAutofillAblationStudyEnabledForAddressesParam.Get()) {
        return AblationGroup::kDefault;
      }
      break;
    case FormTypeForAblationStudy::kPayment:
      if (!kAutofillAblationStudyEnabledForPaymentsParam.Get()) {
        return AblationGroup::kDefault;
      }
      break;
  }

  const auto ablation_list_params =
      std::to_array<const base::FeatureParam<int>*>(
          {&kAutofillAblationStudyAblationWeightPerMilleList1Param,
           &kAutofillAblationStudyAblationWeightPerMilleList2Param,
           &kAutofillAblationStudyAblationWeightPerMilleList3Param,
           &kAutofillAblationStudyAblationWeightPerMilleList4Param,
           &kAutofillAblationStudyAblationWeightPerMilleList5Param,
           &kAutofillAblationStudyAblationWeightPerMilleList6Param});
  using OptimizationType = optimization_guide::proto::OptimizationType;
  static constexpr auto ablation_optimization_types =
      std::to_array<OptimizationType>(
          {OptimizationType::AUTOFILL_ABLATION_SITES_LIST1,
           OptimizationType::AUTOFILL_ABLATION_SITES_LIST2,
           OptimizationType::AUTOFILL_ABLATION_SITES_LIST3,
           OptimizationType::AUTOFILL_ABLATION_SITES_LIST4,
           OptimizationType::AUTOFILL_ABLATION_SITES_LIST5,
           OptimizationType::AUTOFILL_ABLATION_SITES_LIST6});

  base::Time now = AutofillClock::Now();
  for (auto [param, optimization_type] :
       base::zip(ablation_list_params, ablation_optimization_types)) {
    // Do some basic checks for plausibility. Note that for testing purposes
    // we allow that ablation_weight == 1000. In this case 100% of forms are
    // in the ablation case. In practice ablation_weight * 2 <= total_weight
    // should be true to get meaningful results (have an equally sized
    // ablation and control group).
    const int ablation_weight = param->Get();
    if (ablation_weight <= 0 || ablation_weight > 1000) {
      continue;
    }
    if (!autofill_optimization_guide ||
        !autofill_optimization_guide->IsEligibleForAblation(
            url, optimization_type)) {
      continue;
    }
    return GetAblationGroupImpl(url, now, ablation_weight);
  }

  // Do some basic checks for plausibility. See above.
  int ablation_weight = kAutofillAblationStudyAblationWeightPerMilleParam.Get();
  if (ablation_weight <= 0 || ablation_weight > 1000) {
    return AblationGroup::kDefault;
  }
  return GetAblationGroupImpl(url, now, ablation_weight);
}

AblationGroup AutofillAblationStudy::GetAblationGroupImpl(
    const GURL& url,
    base::Time now,
    uint32_t ablation_weight_per_mille) const {
  if (seed_.empty()) {
    return AblationGroup::kDefault;
  }
  uint64_t hash = GetAblationHash(seed_, url, now) % 1000;
  if (hash < ablation_weight_per_mille) {
    return AblationGroup::kAblation;
  }
  if (hash < 2 * ablation_weight_per_mille) {
    return AblationGroup::kControl;
  }
  return AblationGroup::kDefault;
}

}  // namespace autofill