File: btm_navigation_flow_detector.h

package info (click to toggle)
chromium 142.0.7444.175-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,295,352 kB
  • sloc: cpp: 35,488,378; ansic: 7,479,680; javascript: 4,259,373; python: 1,466,843; xml: 757,444; asm: 710,716; pascal: 187,980; sh: 89,247; perl: 88,690; objc: 79,984; sql: 56,984; cs: 42,192; fortran: 24,137; makefile: 22,913; tcl: 15,277; php: 14,018; yacc: 9,005; ruby: 7,553; awk: 3,720; lisp: 3,096; lex: 1,330; ada: 727; jsp: 228; sed: 36
file content (205 lines) | stat: -rw-r--r-- 8,341 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_
#define CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_

#include <optional>
#include <string>

#include "base/memory/weak_ptr.h"
#include "base/time/clock.h"
#include "content/browser/btm/btm_page_visit_observer.h"
#include "content/common/content_export.h"
#include "content/public/browser/web_contents_user_data.h"
#include "services/metrics/public/cpp/ukm_source_id.h"

namespace content {

namespace btm {

// Should match DIPSDirectNavigationSource in tools/metrics/histograms/enums.xml
enum class DirectNavigationSource {
  kUnknown = 0,
  kOmnibar = 1,
  kBookmark = 2,
};

// An entrypoint is the first site in a navigation flow that is not the
// referrer. For example, in a flow A->B->C, B is the entrypoint.
struct EntrypointInfo {
  // Used when the entrypoint has a server redirect exit.
  explicit EntrypointInfo(const BtmServerRedirectInfo& server_redirect_info,
                          bool was_referral_client_redirect_like);
  // Used when the entrypoint has a client redirect(-like) exit, when the page
  // visit has already been reported.
  explicit EntrypointInfo(const BtmNavigationInfo& referral,
                          const BtmPageVisitInfo& entrypoint_visit);
  // Used when the entrypoint has a client redirect(-like) exit, when the
  // EntrypointInfo needs to be created before the page visit is reported.
  explicit EntrypointInfo(const BtmNavigationInfo& referral);

  const std::string site;
  ukm::SourceId source_id;
  bool had_active_storage_access;
  bool was_referral_client_redirect;
};

// The status of a navigation flow.
enum class FlowStatus {
  // The most recent sequence of navigations did not qualify as a navigation
  // flow.
  kInvalidated = 0,
  // The most recent sequence of navigations may be part of a navigation flow,
  // but we can't know until we observe more navigations.
  kOngoing,
  // The most recent sequence of navigations is a valid and complete navigation
  // flow.
  kEnded,
};

// State necessary for deciding whether to emit, and actually emitting, the UKM
// `DIPS.TrustIndicator.InFlowSuccessorInteractionV2`.
class InFlowSuccessorInteractionState {
 public:
  explicit InFlowSuccessorInteractionState(btm::EntrypointInfo flow_entrypoint);
  ~InFlowSuccessorInteractionState();

  void RecordActiveStorageAccessByEntrypoint();
  void IncrementFlowIndex(size_t increment);
  void RecordSuccessorInteractionAtCurrentFlowIndex();
  bool IsAtSuccessor() const;

  const btm::EntrypointInfo& flow_entrypoint() const {
    return flow_entrypoint_;
  }
  size_t flow_index() const { return flow_index_; }
  const std::vector<size_t>& successor_interaction_indices() const {
    return successor_interaction_indices_;
  }

 private:
  btm::EntrypointInfo flow_entrypoint_;
  size_t flow_index_ = 0;
  std::vector<size_t> successor_interaction_indices_;
};

}  // namespace btm

// Detects possible navigation flows with the aim of discovering how to
// distinguish user-interest navigation flows from navigational tracking.
//
// Currently only reports UKM to inform how we might identify possible
// navigational tracking by sites that also perform user-interest activity.
//
// For most UKM events emitted by this class, a navigation flow consists of
// three consecutive navigations in the same tab (A->B->C). Some events might be
// recorded for flows with more than three navigations (such as
// `DIPS.TrustIndicator.InFlowSuccessorInteractionV2`, where there are 4 or more
// navigations).
class CONTENT_EXPORT BtmNavigationFlowDetector
    : public WebContentsUserData<BtmNavigationFlowDetector> {
 public:
  ~BtmNavigationFlowDetector() override;

  void SetClockForTesting(base::Clock* clock) {
    page_visit_observer_.SetClockForTesting(clock);
  }

 protected:
  explicit BtmNavigationFlowDetector(WebContents* web_contents);

  // Records an event describing the characteristics of a navigation flow.
  void MaybeEmitNavFlowNodeUkmForPreviousPage();
  bool CanEmitNavFlowNodeUkmForPreviousPage() const;

  // Records events for flows we suspect include a tracker and have a server
  // redirect.
  void MaybeEmitSuspectedTrackerFlowUkmForServerRedirectExit(
      const BtmServerRedirectInfo& exit_info,
      int32_t flow_id);
  bool CanEmitSuspectedTrackerFlowUkmForServerRedirectExit(
      const BtmServerRedirectInfo& exit_info) const;

  // Records events for flows we suspect include a tracker and have a client
  // redirect.
  void MaybeEmitSuspectedTrackerFlowUkmForClientRedirectExit(int32_t flow_id);
  bool CanEmitSuspectedTrackerFlowUkmForClientRedirectExit() const;

  bool CanEmitSuspectedTrackerFlowUkm(
      const BtmPageVisitInfo& referrer_page_info,
      const btm::EntrypointInfo& entrypoint_info,
      const std::string& exit_site) const;

  // Records an event for flows where there was a user interaction in between,
  // i.e. for flow A->B->C, there was a user interaction on B. This could be
  // used as a signal that B is not a tracker.
  void MaybeEmitInFlowInteraction(int32_t flow_id);

  // Records events for flows where there's a series of same-site redirects,
  // followed by a page with a user interaction (what we consider the
  // "successor"), followed by another series of same-site redirects that end
  // in a cross-site redirect. For example, we would record this event for
  // A->B1->B2->B3->C, where B2 had a user interaction. This pattern is commonly
  // used in auth flows and could be used as a signal that B1 is not a tracker.
  void MaybeEmitInFlowSuccessorInteraction();

 private:
  // So WebContentsUserData::CreateForWebContents can call the constructor.
  friend class WebContentsUserData<BtmNavigationFlowDetector>;

  // Callback to be called by `BtmPageVisitObserver`.
  void OnPageVisitReported(BtmPageVisitInfo page_visit,
                           BtmNavigationInfo navigation);

  btm::FlowStatus FlowStatusAfterNavigation(
      bool did_most_recent_navigation_start_new_flow) const;
  // Returns whether the entrypoint was set or not.
  bool MaybeInitializeSuccessorInteractionTrackingState();

  // Must be called only when `previous_page_to_current_page_` is populated.
  const std::string GetSiteForCurrentPage() const;

  // Navigation Flow:
  // A navigation flow consists of three navigations in a tab (A->B->C). The
  // state below is updated when the primary page changes.
  //
  // Note that server redirects don't commit, so if there's a server redirect
  // from B->C, B is not committed and not reported as a page visit, but instead
  // is captured in the `server_redirects` field of the corresponding
  // `BtmNavigationInfo`. In this case, `previous_page_` corresponds to A,
  // `previous_page_to_current_page_->server_redirects` will contain B, and
  // `previous_page_to_current_page_->destination` will have some limited
  // information about C.

  // In a series of three committed pages A->B->C, contains information about
  // the visit on A.
  std::optional<BtmPageVisitInfo> two_pages_ago_;
  // In a series of three committed pages A->B->C, contains information about
  // the navigation A->B.
  std::optional<BtmNavigationInfo> two_pages_ago_to_previous_page_;
  // In a series of three committed pages A->B->C, contains information about
  // the visit on B.
  std::optional<BtmPageVisitInfo> previous_page_;
  // In a series of three committed pages A->B->C, contains information about
  // the navigation B->C.
  std::optional<BtmNavigationInfo> previous_page_to_current_page_;

  // The status of a flow for the purposes of InFlowSuccessorInteraction, after
  // the most recent primary page change.
  btm::FlowStatus flow_status_ = btm::FlowStatus::kInvalidated;
  // Data needed for emitting DIPS.TrustIndicator.InFlowSuccessorInteraction.
  // Set only when there's an ongoing flow that's possibly valid (we can't know
  // for sure until it ends or is invalidated).
  std::optional<btm::InFlowSuccessorInteractionState>
      successor_interaction_tracking_state_;

  BtmPageVisitObserver page_visit_observer_;

  WEB_CONTENTS_USER_DATA_KEY_DECL();
};

}  // namespace content

#endif  // CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_