File: limited_layer_entropy_cost_tracker.cc

package info (click to toggle)
chromium 140.0.7339.127-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 6,192,880 kB
  • sloc: cpp: 35,093,808; ansic: 7,161,670; javascript: 4,199,694; python: 1,441,797; asm: 949,904; xml: 747,503; pascal: 187,748; perl: 88,691; sh: 88,248; objc: 79,953; sql: 52,714; cs: 44,599; fortran: 24,137; makefile: 22,114; tcl: 15,277; php: 13,980; yacc: 9,000; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (251 lines) | stat: -rw-r--r-- 9,292 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/variations/limited_layer_entropy_cost_tracker.h"

#include <math.h>

#include <cstdint>
#include <limits>

#include "base/check_op.h"
#include "base/debug/dump_without_crashing.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/numerics/checked_math.h"
#include "base/numerics/safe_conversions.h"
#include "components/variations/variations_layers.h"
#include "components/variations/variations_seed_processor.h"

namespace variations {
namespace {

// Converts a probability value (represented by numerator/denominator) to an
// entropy value. Callers should ensure that both arguments are strictly
// positive and that `numerator` <= `denominator`. This always returns a
// non-negative number.
double ConvertToBitsOfEntropy(uint64_t numerator, uint64_t denominator) {
  CHECK_GT(numerator, 0u);
  CHECK_LE(numerator, denominator);
  return -log2(base::strict_cast<double>(numerator) /
               base::strict_cast<double>(denominator));
}

// Returns the number of bits of entropy used by a single study.
double GetEntropyUsedByStudy(const Study& study) {
  if (study.consistency() == Study::SESSION) {
    // Session-consistent studies do not consume entropy. They are randomized
    // for each Chrome browser process' lifetime; they use neither the low
    // entropy source nor the limited entropy randomization source.
    return 0.0;
  }
  // Use uint32_t to match the type of `probability_weight` field in the
  // experiment proto.
  uint32_t min_weight = std::numeric_limits<uint32_t>::max();
  uint64_t total_weight = 0;

  // The entropy limit applies specifically to the experiments that specify a
  // Google web experiment ID (or Google web trigger experiment ID).
  bool has_google_web_experiment = false;
  for (const auto& experiment : study.experiment()) {
    // This will CHECK if `total_weight` (a uint64_t) overflows, which is nearly
    // impossible since each `experiment.probability_weight()` is a uint32_t.
    // This is not expected to come up for valid variations seeds in production.
    total_weight = base::CheckAdd(total_weight, experiment.probability_weight())
                       .ValueOrDie();

    // Skip experiments with zero probability. They will not cause entropy
    // usage since they will never be assigned. Also, checking for non-zero
    // probability ensures that `has_google_web_experiment`
    // implies that `total_weight` > 0.
    if (experiment.probability_weight() > 0u &&
        VariationsSeedProcessor::HasGoogleWebExperimentId(experiment)) {
      has_google_web_experiment = true;
      min_weight = std::min(min_weight, experiment.probability_weight());
    }
  }
  if (!has_google_web_experiment) {
    return 0.0;
  }

  // By now, `has_google_web_experiment` being true implies 0 < `min_weight` <=
  // `total_weight`, which is required by ConvertToBitsOfEntropy().
  //
  // Mathematically, this returns -log2(`min_weight` / `total_weight`).
  // If the probability of a client being assigned to a specific group in the
  // study is p, the entropy revealed by this assignment is -log2(p):
  // https://en.wikipedia.org/wiki/Entropy_(information_theory). Hence, the
  // entropy is maximal for clients assigned to the smallest group in the study.
  return ConvertToBitsOfEntropy(min_weight, total_weight);
}

// Computes the entropy used by the limited layer member.
double GetLayerMemberEntropy(const Layer::LayerMember& member,
                             uint64_t num_slots) {
  uint32_t num_slots_in_member = 0;
  for (const Layer::LayerMember::SlotRange& range : member.slots()) {
    // Adding one since the range is inclusive.
    num_slots_in_member += range.end() - range.start() + 1;
  }
  return ConvertToBitsOfEntropy(num_slots_in_member, num_slots);
}

}  // namespace

LimitedLayerEntropyCostTracker::LimitedLayerEntropyCostTracker(
    const Layer& layer,
    double entropy_limit_in_bits)
    : entropy_limit_in_bits_(entropy_limit_in_bits),
      limited_layer_id_(layer.id()) {
  // The caller should have already validated the layer. However, as the layer
  // data comes from an external source, we verify it here again for safety,
  // instead of using a CHECK. Note that verify each condition individually in
  // order to dump a unique stack trace for each failure condition.
  if (limited_layer_id_ == 0u) {
    Invalidate();
    return;
  }
  if (entropy_limit_in_bits_ <= 0.0) {
    Invalidate();
    return;
  }
  const auto num_slots = layer.num_slots();
  if (num_slots <= 0u) {
    Invalidate();
    return;
  }
  const auto& layer_members = layer.members();
  if (layer_members.empty()) {
    Invalidate();
    return;
  }
  if (layer.entropy_mode() != Layer::LIMITED) {
    Invalidate();
    return;
  }
  if (!VariationsLayers::AreSlotBoundsValid(layer)) {
    Invalidate();
    return;
  }

  // Compute the entropy used by each layer member keyed by its memberID.
  entropy_used_by_member_id_.reserve(layer_members.size());
  for (const auto& member : layer_members) {
    if (member.id() == 0u) {
      Invalidate();
      return;
    }
    // All layer members are included in the entropy calculation, including
    // empty ones – ones not referenced by any study. A client assigned to an
    // empty layer member would have the visible assignment state of "no study
    // assigned", which itself reveals information and should be accounted for
    // in the entropy calculation.
    const bool inserted =
        entropy_used_by_member_id_
            .emplace(member.id(), GetLayerMemberEntropy(member, num_slots))
            .second;
    if (!inserted) {
      // => Duplicated layer member ID.
      Invalidate();
      return;
    }
  }
}

LimitedLayerEntropyCostTracker::~LimitedLayerEntropyCostTracker() = default;

bool LimitedLayerEntropyCostTracker::AddEntropyUsedByStudy(const Study& study) {
  if (!IsValid()) {
    return false;
  }
  // The caller should have already validated the study's layer references.
  // However, as the study data comes from an external source, we verify it
  // here again for safety, instead of using a CHECK. Note that verify each
  // condition individually in order to dump a unique stack trace for each
  // failure condition.
  if (!study.has_layer()) {
    Invalidate();
    return false;
  }
  const auto& layer_ref = study.layer();
  if (layer_ref.layer_id() != limited_layer_id_) {
    Invalidate();
    return false;
  }
  const auto& layer_member_ids =
      layer_ref.layer_member_ids().empty()
          ? VariationsLayers::FallbackLayerMemberIds(layer_ref)
          : layer_ref.layer_member_ids();
  if (layer_member_ids.empty()) {
    Invalidate();
    return false;
  }

  // Returns false if the entropy used by a layer member is already above the
  // entropy limit, meaning no more study can be assigned to the limited layer.
  if (entropy_limit_exceeded_) {
    return false;
  }

  // Returns true if the study does not consume entropy at all (e.g. a study
  // with no Google web experiment ID or Google web trigger experiment ID).
  double study_entropy = GetEntropyUsedByStudy(study);
  if (study_entropy <= 0) {
    return true;
  }

  // Update the entropy in the members referenced by the study. It is assumed
  // that layer member references have already been validated by the caller.
  for (const uint32_t member_id : layer_member_ids) {
    if (member_id == 0u) {
      Invalidate();
      return false;
    }
    const auto it = entropy_used_by_member_id_.find(member_id);
    if (it == entropy_used_by_member_id_.end()) {
      Invalidate();
      return false;
    }

    auto& entropy_used = it->second;
    entropy_used += study_entropy;
    includes_study_entropy_ = true;

    // TODO(siakabaro): The entropy used by a layer member could be over the
    // entropy limit if the layer member covers a very small percentage of the
    // population. In such a case, we need to need to pool the empty layer
    // members together and check if their combined entropy is not over the
    // limit.
    if (entropy_used > entropy_limit_in_bits_) {
      entropy_limit_exceeded_ = true;
    }
  }

  // Returns false if the entropy limit is reached.
  return !entropy_limit_exceeded_;
}

double LimitedLayerEntropyCostTracker::GetMaxEntropyUsedForTesting() const {
  if (!includes_study_entropy_) {
    return 0.0;
  }
  double max_entropy_used = 0.0;
  for (const auto& [member_id, entropy_used] : entropy_used_by_member_id_) {
    max_entropy_used = std::max(max_entropy_used, entropy_used);
  }
  return max_entropy_used;
}

void LimitedLayerEntropyCostTracker::Invalidate() {
  // The caller should have already validated the layer and study info before
  // any and all calls to the tracker. However, as the layer and study data
  // comes from an external source, there are additional safety checks made
  // throughout the tracker. We use these instead of CHECKS or DCHECKS and
  // verify each condition individually in order to dump a unique stack trace
  // for each failure condition.
  is_valid_ = false;
  base::debug::DumpWithoutCrashing();
}

}  // namespace variations