File: btm_navigation_flow_detector.h

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (193 lines) | stat: -rw-r--r-- 7,637 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_
#define CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_

#include <optional>
#include <string>

#include "base/memory/weak_ptr.h"
#include "base/time/clock.h"
#include "content/browser/btm/btm_page_visit_observer.h"
#include "content/common/content_export.h"
#include "content/public/browser/web_contents_user_data.h"
#include "services/metrics/public/cpp/ukm_source_id.h"

namespace content {

namespace btm {

// Should match DIPSDirectNavigationSource in tools/metrics/histograms/enums.xml
enum class DirectNavigationSource {
  kUnknown = 0,
  kOmnibar = 1,
  kBookmark = 2,
};

struct EntrypointInfo {
  // Used when the entrypoint has a server redirect exit.
  explicit EntrypointInfo(const BtmServerRedirectInfo& server_redirect_info,
                          bool was_referral_client_redirect_like);
  // Used when the entrypoint has a client redirect(-like) exit, when the page
  // visit has already been reported.
  explicit EntrypointInfo(const BtmNavigationInfo& referral,
                          const BtmPageVisitInfo& entrypoint_visit);
  // Used when the entrypoint has a client redirect(-like) exit, when the
  // EntrypointInfo needs to be created before the page visit is reported.
  explicit EntrypointInfo(const BtmNavigationInfo& referral);

  const std::string site;
  ukm::SourceId source_id;
  bool had_active_storage_access;
  bool was_referral_client_redirect;
};

enum class FlowStatus {
  kInvalidated = 0,
  kOngoing,
  kEnded,
};

class InFlowSuccessorInteractionState {
 public:
  explicit InFlowSuccessorInteractionState(btm::EntrypointInfo flow_entrypoint);
  ~InFlowSuccessorInteractionState();

  void RecordActiveStorageAccessByEntrypoint();
  void IncrementFlowIndex(size_t increment);
  void RecordSuccessorInteractionAtCurrentFlowIndex();
  bool IsAtSuccessor() const;

  const btm::EntrypointInfo& flow_entrypoint() const {
    return flow_entrypoint_;
  }
  size_t flow_index() const { return flow_index_; }
  const std::vector<size_t>& successor_interaction_indices() const {
    return successor_interaction_indices_;
  }

 private:
  btm::EntrypointInfo flow_entrypoint_;
  size_t flow_index_ = 0;
  std::vector<size_t> successor_interaction_indices_;
};

}  // namespace btm

// Detects possible navigation flows with the aim of discovering how to
// distinguish user-interest navigation flows from navigational tracking.
//
// For most events a navigation flow consists of three consecutive navigations
// in a tab (A->B->C). Some events might be recorded for flows with more than
// three navigations e.g. InFlowSuccessorInteraction where there is 4 or more
// navigations.
//
// Currently only reports UKM to inform how we might identify possible
// navigational tracking by sites that also perform user-interest activity.
class CONTENT_EXPORT BtmNavigationFlowDetector
    : public WebContentsUserData<BtmNavigationFlowDetector> {
 public:
  ~BtmNavigationFlowDetector() override;

  void SetClockForTesting(base::Clock* clock) {
    page_visit_observer_.SetClockForTesting(clock);
  }

 protected:
  explicit BtmNavigationFlowDetector(WebContents* web_contents);

  // Records an event describing the characteristics of a navigation flow.
  void MaybeEmitNavFlowNodeUkmForPreviousPage();
  bool CanEmitNavFlowNodeUkmForPreviousPage() const;

  // Records events for flows we suspect include a tracker and have a server
  // redirect.
  void MaybeEmitSuspectedTrackerFlowUkmForServerRedirectExit(
      const BtmServerRedirectInfo& exit_info,
      int32_t flow_id);
  bool CanEmitSuspectedTrackerFlowUkmForServerRedirectExit(
      const BtmServerRedirectInfo& exit_info) const;

  // Records events for flows we suspect include a tracker and have a client
  // redirect.
  void MaybeEmitSuspectedTrackerFlowUkmForClientRedirectExit(int32_t flow_id);
  bool CanEmitSuspectedTrackerFlowUkmForClientRedirectExit() const;

  bool CanEmitSuspectedTrackerFlowUkm(
      const BtmPageVisitInfo& referrer_page_info,
      const btm::EntrypointInfo& entrypoint_info,
      const std::string& exit_site) const;

  // Records an event for flows where there was a user interaction in between,
  // i.e. for flow A->B->C, there was a user interaction on B. This could be
  // used as a signal that B is not a tracker.
  void MaybeEmitInFlowInteraction(int32_t flow_id);

  // Records events for flows where there's a series of same-site redirects,
  // followed by a page with a user interaction (what we consider the
  // "successor"), followed by another series of same-site redirects that end
  // in a cross-site redirect. For example, we would record this event for
  // A->B1->B2->B3->C, where B2 had a user interaction. This pattern is commonly
  // used in auth flows and could be used as a signal that B1 is not a tracker.
  void MaybeEmitInFlowSuccessorInteraction();

 private:
  // So WebContentsUserData::CreateForWebContents can call the constructor.
  friend class WebContentsUserData<BtmNavigationFlowDetector>;

  // Callback to be called by `BtmPageVisitObserver`.
  void OnPageVisitReported(BtmPageVisitInfo page_visit,
                           BtmNavigationInfo navigation);

  btm::FlowStatus FlowStatusAfterNavigation(
      bool did_most_recent_navigation_start_new_flow) const;
  // Returns whether the entrypoint was set or not.
  bool MaybeInitializeSuccessorInteractionTrackingState();

  // Must be called only when `previous_page_to_current_page_` is populated.
  const std::string GetSiteForCurrentPage() const;

  // Navigation Flow:
  // A navigation flow consists of three navigations in a tab (A->B->C).
  // The infos below are updated when the primary page changes.
  //
  // Note that server redirects don't commit, so if there's a server redirect
  // from B->C, B is not committed and not reported as a page visit, but instead
  // in the `server_redirects` field of the corresponding `BtmNavigationInfo`.
  // In this case, `previous_page_` corresponds to A,
  // `previous_page_to_current_page_->server_redirects` will contain B, and
  // `previous_page_to_current_page_->destination` will have some limited
  // information about C.

  // In a series of three committed pages A->B->C, contains information about
  // the visit on A.
  std::optional<BtmPageVisitInfo> two_pages_ago_;
  // In a series of three committed pages A->B->C, contains information about
  // the navigation A->B.
  std::optional<BtmNavigationInfo> two_pages_ago_to_previous_page_;
  // In a series of three committed pages A->B->C, contains information about
  // the visit on B.
  std::optional<BtmPageVisitInfo> previous_page_;
  // In a series of three committed pages A->B->C, contains information about
  // the navigation B->C.
  std::optional<BtmNavigationInfo> previous_page_to_current_page_;

  // The status of a flow for the purposes of InFlowSuccessorInteraction, after
  // the most recent primary page change.
  btm::FlowStatus flow_status_ = btm::FlowStatus::kInvalidated;
  // Data needed for emitting DIPS.TrustIndicator.InFlowSuccessorInteraction.
  // Set only when there's an ongoing flow that's possibly valid (we can't know
  // for sure until it ends or is invalidated).
  std::optional<btm::InFlowSuccessorInteractionState>
      successor_interaction_tracking_state_;

  BtmPageVisitObserver page_visit_observer_;

  WEB_CONTENTS_USER_DATA_KEY_DECL();
};

}  // namespace content

#endif  // CONTENT_BROWSER_BTM_BTM_NAVIGATION_FLOW_DETECTOR_H_