File: browsing_topics_state.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (206 lines) | stat: -rw-r--r-- 8,700 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_STATE_H_
#define COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_STATE_H_

#include "base/containers/queue.h"
#include "base/files/important_file_writer.h"
#include "base/gtest_prod_util.h"
#include "base/task/sequenced_task_runner.h"
#include "base/time/time.h"
#include "components/browsing_topics/common/common_types.h"
#include "components/browsing_topics/epoch_topics.h"

namespace browsing_topics {

// Contains the data needed to calculate the browsing topics when a context
// requests it via document.browsingTopics(). The data is backed by a JSON file:
// when `BrowsingTopicsState` is initialized, the state members will be read
// from the file on a backend thread, and all overwriting methods will schedule
// an update to the file. The `BrowsingTopicsState`'s owner should listen on the
// `loaded_callback` notification. Before the loading finishes, it's disallowed
// to access this `BrowsingTopicsState`.
class BrowsingTopicsState
    : public base::ImportantFileWriter::BackgroundDataSerializer {
 public:
  struct LoadResult {
    LoadResult(bool file_exists, std::unique_ptr<base::Value> value);
    ~LoadResult();

    LoadResult(const LoadResult&) = delete;
    LoadResult& operator=(const LoadResult&) = delete;
    LoadResult(LoadResult&&) = delete;
    LoadResult& operator=(LoadResult&&) = delete;

    bool file_exists = false;

    // The deserialized value from the content of the json file.
    std::unique_ptr<base::Value> value;
  };

  // The result of parsing a `LoadResult::value` to the `BrowsingTopicsState`.
  struct ParseResult {
    // Whether the parsing was successful. Parsing can fail due to corrupted
    // data.
    bool success = false;

    // Whether `BrowsingTopicsState` should be saved to the file after parsing.
    // Saving is needed if the config version has been updated, or if an error
    // is encountered (to clean up unneeded data). In the common case where the
    // data is loaded from a pre-existing file, the file save isn't necessary.
    bool should_save_state_to_file = false;
  };

  explicit BrowsingTopicsState(const base::FilePath& profile_path,
                               base::OnceClosure loaded_callback);

  ~BrowsingTopicsState() override;

  BrowsingTopicsState(const BrowsingTopicsState&) = delete;
  BrowsingTopicsState& operator=(const BrowsingTopicsState&) = delete;
  BrowsingTopicsState(BrowsingTopicsState&&) = delete;
  BrowsingTopicsState& operator=(BrowsingTopicsState&&) = delete;

  // Clear `epochs_`.
  void ClearAllTopics();

  // Clear the topics data at `epochs_[epoch_index]`. Note that this doesn't
  // remove the entry from `epochs_`.
  void ClearOneEpoch(size_t epoch_index);

  // Clear the topic and observing domains data for `topic`.
  void ClearTopic(Topic topic);

  // Clear the observing domains data in `epochs_`  that match
  // `hashed_context_domain`.
  void ClearContextDomain(const HashedDomain& hashed_context_domain);

  // Append `epoch_topics` to `epochs_`. This is invoked at the end of each
  // epoch calculation. If an old EpochTopics is removed as a result, return it.
  std::optional<EpochTopics> AddEpoch(EpochTopics epoch_topics);

  // Remove expired epochs synchronously. For unexpired epochs, let each one
  // schedule its own expiration task. Upon expiration of each epoch,
  // `OnEpochExpired` will be called to remove it from `epochs_`.
  void ScheduleEpochsExpiration();

  // Calculates the new scheduled time by adding the provided `delay`
  // to the current time (`base::Time::Now()`), and stores the result to
  // `next_scheduled_calculation_time_`.
  void UpdateNextScheduledCalculationTime(base::TimeDelta delay);

  // Calculate the candidate epochs to derive the topics from on `top_domain`.
  // The caller (i.e. BrowsingTopicsServiceImpl, which also holds `this`) is
  // responsible for ensuring that the `EpochTopic` objects that the pointers
  // refer to remain alive when the caller is accessing them.
  std::vector<const EpochTopics*> EpochsForSite(
      const std::string& top_domain) const;

  const base::circular_deque<EpochTopics>& epochs() const {
    DCHECK(loaded_);
    return epochs_;
  }

  base::Time next_scheduled_calculation_time() const {
    DCHECK(loaded_);
    return next_scheduled_calculation_time_;
  }

  ReadOnlyHmacKey hmac_key() const {
    DCHECK(loaded_);
    return hmac_key_;
  }

  bool HasScheduledSaveForTesting() const;

 private:
  FRIEND_TEST_ALL_PREFIXES(BrowsingTopicsStateTest,
                           EpochsForSite_OneEpoch_IntroductionTime);
  FRIEND_TEST_ALL_PREFIXES(BrowsingTopicsStateTest,
                           EpochsForSite_OneEpoch_IntroductionTime2);
  FRIEND_TEST_ALL_PREFIXES(BrowsingTopicsStateTest,
                           EpochsForSite_ThreeEpochs_IntroductionTime);
  FRIEND_TEST_ALL_PREFIXES(BrowsingTopicsStateTest,
                           EpochsForSite_OneEpoch_ManuallyTriggered);
  FRIEND_TEST_ALL_PREFIXES(BrowsingTopicsStateTest, EpochsForSite_PhaseOutTime);

  // Calculate the delay between the calculation of the latest epoch and when a
  // site starts seeing that epoch's topics. The site transitions to the latest
  // epoch at a per-site, per-epoch random time within
  // [calculation time, calculation time + max delay).
  base::TimeDelta CalculateSiteStickyIntroductionDelay(
      const std::string& top_domain) const;

  // Calculate the time offset between when a site stops seeing an epoch's
  // topics and when the epoch is actually deleted. The site transitions away
  // from the epoch at a per-site, per-epoch random time within
  // [deletion time - max offset, deletion time].
  //
  // Note: The actual phase-out time can be influenced by the
  // 'kBrowsingTopicsNumberOfEpochsToExpose' setting. If this setting enforces a
  // more restrictive phase-out, that will take precedence.
  base::TimeDelta CalculateSiteStickyPhaseOutTimeOffset(
      const std::string& top_domain,
      const EpochTopics& epoch) const;

  // ImportantFileWriter::BackgroundDataSerializer implementation.
  base::ImportantFileWriter::BackgroundDataProducerCallback
  GetSerializedDataProducerForBackgroundSequence() override;

  base::Value::Dict ToDictValue() const;

  void ScheduleSave();

  void DidLoadFile(base::OnceClosure loaded_callback,
                   std::unique_ptr<LoadResult> load_result);

  void OnEpochExpired(base::Time calculation_time);

  // Parse `value` and populate the state member variables.
  ParseResult ParseValue(const base::Value& value);

  // Sequenced task runner where disk writes will be performed.
  scoped_refptr<base::SequencedTaskRunner> backend_task_runner_;

  // Helper to write data safely.
  base::ImportantFileWriter writer_;

  // Contains the browsing topics of the latest epochs, as well as the topics
  // observed by each context domain in each of the epoch. These entries are in
  // time ascending order: a new entry will be appended to `epochs_` on every
  // browsing topics calculation, regardless of whether it succeeded or not. We
  // are only interested in the latest
  // `kBrowsingTopicsNumberOfEpochsToExpose + 1` epochs (i.e. the epoch
  // switching time will be per-user, per-site, with a full epoch range of
  // variance, thus one extra epoch are kept here), so old data will be
  // automatically removed, and the size of the queue won't exceed that limit.
  base::circular_deque<EpochTopics> epochs_;

  // The next time a calculation should occur. This will be updated when a
  // calculation is scheduled at the end of a topics calculation and is always
  // synchronously updated with `epochs_`.
  //
  // next_scheduled_calculation_time_.is_null() indicates this is a new profile
  // or there was an update to the configuration version when this
  // `BrowsingTopicsState` is initialized. In either case, `epochs_` will be
  // empty.
  base::Time next_scheduled_calculation_time_;

  // The key for calculating the per-user hash numbers. See ./util.h for various
  // use cases. This key is generated and synced to storage in the first
  // browsing session. It won't be reset/updated in any case.
  HmacKey hmac_key_{};

  // Whether the state members are loaded from file. Public accessor methods are
  // disallowed (except for `HasScheduledSaveForTesting`) before `loaded_`
  // becomes true.
  bool loaded_ = false;

  base::WeakPtrFactory<BrowsingTopicsState> weak_ptr_factory_{this};
};

}  // namespace browsing_topics

#endif  // COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_STATE_H_