File: safe_browsing_navigation_observer_manager.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (490 lines) | stat: -rw-r--r-- 22,127 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_
#define COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_

#include <optional>
#include <unordered_map>

#include "base/containers/circular_deque.h"
#include "base/feature_list.h"
#include "base/memory/raw_ptr.h"
#include "base/supports_user_data.h"
#include "base/time/time.h"
#include "base/timer/timer.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/safe_browsing/core/browser/referrer_chain_provider.h"
#include "components/safe_browsing/core/common/proto/csd.pb.h"
#include "components/sessions/core/session_id.h"
#include "content/public/browser/service_worker_context.h"
#include "content/public/browser/service_worker_context_observer.h"
#include "content/public/browser/web_contents_observer.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
#include "ui/base/clipboard/clipboard.h"
#include "url/gurl.h"

class PrefService;

namespace content {
class NavigationHandle;
struct GlobalRenderFrameHostId;
}

namespace safe_browsing {

class SafeBrowsingNavigationObserver;
struct NavigationEvent;
struct ResolvedIPAddress;

// User data stored in DownloadItem for referrer chain information.
class ReferrerChainData : public base::SupportsUserData::Data {
 public:
  ReferrerChainData(ReferrerChainProvider::AttributionResult attribution_result,
                    std::unique_ptr<ReferrerChain> referrer_chain,
                    size_t referrer_chain_length,
                    size_t recent_navigation_to_collect);
  ~ReferrerChainData() override;
  ReferrerChainProvider::AttributionResult attribution_result() const {
    return attribution_result_;
  }
  ReferrerChain* GetReferrerChain() const;
  size_t referrer_chain_length() { return referrer_chain_length_; }
  size_t recent_navigations_to_collect() {
    return recent_navigations_to_collect_;
  }

  // Unique user data key used to get and set referrer chain data in
  // DownloadItem.
  static const char kDownloadReferrerChainDataKey[];
  static const char kDownloadReferrerChainDataKeyForEnterprise[];

 private:
  // Result of trying to get the referrer chain. Referrer chains are
  // fetched once per download, at the beginning of downloading to disk.
  ReferrerChainProvider::AttributionResult attribution_result_ =
      ReferrerChainProvider::AttributionResult::NAVIGATION_EVENT_NOT_FOUND;
  // The referrer chain itself
  std::unique_ptr<ReferrerChain> referrer_chain_;
  // This is the actual referrer chain length before appending recent navigation
  // events;
  size_t referrer_chain_length_;
  // |recent_navigations_to_collect_| is controlled by finch parameter. If the
  // user is incognito mode or hasn't enabled extended reporting, this value is
  // always 0.
  size_t recent_navigations_to_collect_;
};

// Struct to store a URL copied to the clipboard, along with which frame and
// main_frame this was copied from.
struct CopyPasteEntry {
  explicit CopyPasteEntry(GURL target,
                          GURL source_frame_url,
                          GURL source_main_frame_url,
                          base::Time recorded_time);
  CopyPasteEntry(const CopyPasteEntry& other);
  GURL target_;
  GURL source_frame_url_;
  GURL source_main_frame_url_;
  base::Time recorded_time_;
};

// Struct that manages insertion, cleanup, and lookup of NavigationEvent
// objects. Its maximum size is `GetNavigationRecordMaxSize()`.
struct NavigationEventList {
 public:
  explicit NavigationEventList(std::size_t size_limit);

  ~NavigationEventList();

  // Finds the index of the most recent navigation event that navigated to
  // |target_url| and  its associated |target_main_frame_url| in the tab with
  // ID |target_tab_id|. Returns an empty optional if event is not found.
  // If navigation happened in the main frame, |target_url| and
  // |target_main_frame_url| are the same.
  // If |target_url| is empty, we use its main frame url (a.k.a.
  // |target_main_frame_url|) to search for navigation events.
  // If |target_tab_id| is invalid, we look for all tabs for the most
  // recent navigation to |target_url| or |target_main_frame_url|.
  // This method starts traversing the list in reverse order of events starting
  // from |start_index| to prevent infinite loops.
  // For some cases, the most recent navigation to |target_url| may not
  // be relevant. For example, url1 in window A opens url2 in window B, url1
  // then opens an about:blank page window C and injects script code in it to
  // trigger a delayed event (e.g. a download) in Window D. Before the event
  // occurs, url2 in window B opens a different about:blank page in window C.
  // A ---- C - D
  //   \   /
  //     B
  // In this case, FindNavigationEvent() will think url2 in Window B is the
  // referrer of about::blank in Window C since this navigation is more recent.
  // However, it does not prevent us to attribute url1 in Window A as the cause
  // of all these navigations. Returns an empty optional if an event is not
  // found.
  //
  // If an |outermost_main_frame_id| is supplied, the function attempts to find
  // a navigation event per the logic described above with the additional
  // constraint that the |outermost_main_frame_id| match. If there is no such
  // event, it will return the first main frame event that matches the other
  // criteria. And if there is still no matching event, the function will return
  // an empty optional.
  std::optional<size_t> FindNavigationEvent(
      const base::Time& last_event_timestamp,
      const GURL& target_url,
      const GURL& target_main_frame_url,
      SessionID target_tab_id,
      const content::GlobalRenderFrameHostId& outermost_main_frame_id,
      size_t start_index);

  // Finds the the navigation event in the |pending_navigation_events_| map that
  // has the same destination URL as the |target_url|. If there are multiple
  // matches, returns the one with the latest updated time.
  NavigationEvent* FindPendingNavigationEvent(const GURL& target_url);

  // Finds the index of the most recent retargeting NavigationEvent index in the
  // list that satisfies the |target_tab_id| and is not the same NavigationEvent
  // stored in |start_index|. Returns -1 if event is not found.
  size_t FindRetargetingNavigationEvent(const base::Time& last_event_timestamp,
                                        SessionID target_tab_id,
                                        size_t start_index);

  void RecordNavigationEvent(
      std::unique_ptr<NavigationEvent> nav_event,
      std::optional<CopyPasteEntry> last_copy_paste_entry = std::nullopt);

  void RecordPendingNavigationEvent(
      content::NavigationHandle* navigation_handle,
      std::unique_ptr<NavigationEvent> nav_event);

  void AddRedirectUrlToPendingNavigationEvent(
      content::NavigationHandle* navigation_handle,
      const GURL& server_redirect_url);

  void RemovePendingNavigationEvent(
      content::NavigationHandle* navigation_handle);

  // Removes stale NavigationEvents and return the number of items removed.
  std::size_t CleanUpNavigationEvents();

  std::size_t NavigationEventsSize() { return navigation_events_.size(); }
  std::size_t PendingNavigationEventsSize() {
    return pending_navigation_events_.size();
  }

  NavigationEvent* GetNavigationEvent(std::size_t index) {
    return navigation_events_[index].get();
  }

  const base::circular_deque<std::unique_ptr<NavigationEvent>>&
  navigation_events() {
    return navigation_events_;
  }

  const base::flat_map<content::NavigationHandle*,
                       std::unique_ptr<NavigationEvent>>&
  pending_navigation_events() {
    return pending_navigation_events_;
  }

 private:
  base::circular_deque<std::unique_ptr<NavigationEvent>> navigation_events_;
  // A map of pending navigation events. They are added when the navigation
  // starts and removed when the navigation is finished.
  base::flat_map<content::NavigationHandle*, std::unique_ptr<NavigationEvent>>
      pending_navigation_events_;

  const std::size_t size_limit_;
};

// Manager class for SafeBrowsingNavigationObserver, which is in charge of
// cleaning up stale navigation events, and identifying landing page/landing
// referrer for a specific Safe Browsing event.
class SafeBrowsingNavigationObserverManager
    : public ReferrerChainProvider,
      public content::ServiceWorkerContextObserver,
      public KeyedService,
      public ui::Clipboard::ClipboardWriteObserver {
 public:
  // Helper function to check if user gesture is older than
  // kUserGestureTTL.
  static bool IsUserGestureExpired(const base::Time& timestamp);

  // Helper function to strip ref fragment from a URL. Many pages end up with a
  // fragment (e.g. http://bar.com/index.html#foo) at the end due to in-page
  // navigation or a single "#" at the end due to navigation triggered by
  // href="#" and javascript onclick function. We don't want to have separate
  // entries for these cases in the maps.
  static GURL ClearURLRef(const GURL& url);

  // Checks if we should enable observing navigations for safe browsing
  // purposes. Returns true if safe browsing is enabled and the safe browsing
  // service is present in the embedder.
  static bool IsEnabledAndReady(PrefService* prefs,
                                bool has_safe_browsing_service);

  // Sanitize referrer chain by only keeping origin information of all URLs.
  static void SanitizeReferrerChain(ReferrerChain* referrer_chain);

  explicit SafeBrowsingNavigationObserverManager(
      PrefService* pref_service,
      content::ServiceWorkerContext* context);

  SafeBrowsingNavigationObserverManager(
      const SafeBrowsingNavigationObserverManager&) = delete;
  SafeBrowsingNavigationObserverManager& operator=(
      const SafeBrowsingNavigationObserverManager&) = delete;

  ~SafeBrowsingNavigationObserverManager() override;

  // Adds |nav_event| to |navigation_event_list_|. Object pointed to by
  // |nav_event| will be no longer accessible after this function.
  void RecordNavigationEvent(content::NavigationHandle* navigation_handle,
                             std::unique_ptr<NavigationEvent> nav_event);
  void RecordPendingNavigationEvent(
      content::NavigationHandle* navigation_handle,
      std::unique_ptr<NavigationEvent> nav_event);
  // Record that a Push Notification initiated a navigation.
  // |script_url| is the URL of the service worker.
  // |url| is the destination URL.
  void RecordNotificationNavigationEvent(const GURL& script_url,
                                         const GURL& url);
  void AddRedirectUrlToPendingNavigationEvent(
      content::NavigationHandle* navigation_handle,
      const GURL& server_redirect_url);
  void RecordUserGestureForWebContents(content::WebContents* web_contents);
  void OnUserGestureConsumed(content::WebContents* web_contents);
  bool HasUserGesture(content::WebContents* web_contents);
  bool HasUnexpiredUserGesture(content::WebContents* web_contents);
  void RecordHostToIpMapping(const std::string& host, const std::string& ip);

  // Clean-ups need to be done when a WebContents gets destroyed.
  void OnWebContentDestroyed(content::WebContents* web_contents);

  // Removes all the observed NavigationEvents, user gestures, and resolved IP
  // addresses that are older than `GetNavigationFootprintTTL()`.
  void CleanUpStaleNavigationFootprints();

  // Based on the |event_url| and |event_tab_id|, traces back the observed
  // NavigationEvents in navigation_event_list_ to identify the sequence of
  // navigations leading to the target, with the coverage limited to
  // |user_gesture_count_limit| number of user gestures. Then converts these
  // identified NavigationEvents into ReferrerChainEntrys and appends them to
  // |out_referrer_chain|.
  AttributionResult IdentifyReferrerChainByEventURL(
      const GURL& event_url,
      SessionID event_tab_id,  // Invalid if tab id is unknown or not available.
      const content::GlobalRenderFrameHostId&
          event_outermost_main_frame_id,  // Can also be Invalid.
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain) override;

  // Helper function to |IdentifyReferrerChainByEventURL| above in cases where
  // |event_outermost_main_frame_id| is not available. That value will default
  // to |content::GlobalRenderFrameHostId()|.
  AttributionResult IdentifyReferrerChainByEventURL(
      const GURL& event_url,
      SessionID event_tab_id,  // Invalid if tab id is unknown or not available.
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain) override;

  // Based on the |event_url|, traces back the observed PendingNavigationEvents
  // and NavigationEvents in navigation_event_list_ to identify the sequence of
  // navigations leading to the |event_url|, with the coverage limited to
  // |user_gesture_count_limit| number of user gestures. Then converts these
  // identified NavigationEvents into ReferrerChainEntrys and appends them to
  // |out_referrer_chain|.
  // Note that the first entry of the ReferrerChainEntrys is matched against the
  // PendingNavigationEvents, and the remaining entries are matched against the
  // NavigationEvents.
  AttributionResult IdentifyReferrerChainByPendingEventURL(
      const GURL& event_url,
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain) override;

  // Based on the |render_frame_host| associated with an event, traces back the
  // observed NavigationEvents in |navigation_event_list_| to identify the
  // sequence of navigations leading to the event hosting page, with the
  // coverage limited to |user_gesture_count_limit| number of user gestures.
  // Then converts these identified NavigationEvents into ReferrerChainEntrys
  // and appends them to |out_referrer_chain|.
  AttributionResult IdentifyReferrerChainByRenderFrameHost(
      content::RenderFrameHost* render_frame_host,
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain) override;

  // Based on the |initiating_frame_url| and its associated |tab_id|, traces
  // back the observed NavigationEvents in navigation_event_list_ to identify
  // those navigations leading to this |initiating_frame_url|. If this
  // initiating frame has a user gesture, we trace back with the coverage
  // limited to |user_gesture_count_limit|-1 number of user gestures, otherwise
  // we trace back |user_gesture_count_limit| number of user gestures. We then
  // converts these identified NavigationEvents into ReferrerChainEntrys and
  // appends them to |out_referrer_chain|.
  AttributionResult IdentifyReferrerChainByHostingPage(
      const GURL& initiating_frame_url,
      const GURL& initiating_main_frame_url,
      const content::GlobalRenderFrameHostId&
          initiating_outermost_main_frame_id,
      SessionID tab_id,
      bool has_user_gesture,
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain);

  // Records the creation of a new WebContents by |source_web_contents|. This is
  // used to detect cross-frame and cross-tab navigations.
  void RecordNewWebContents(content::WebContents* source_web_contents,
                            content::RenderFrameHost* source_render_frame_host,
                            const GURL& target_url,
                            ui::PageTransition page_transition,
                            content::WebContents* target_web_contents,
                            bool renderer_initiated);

  // Based on user state, attribution result and finch parameter, calculates the
  // number of recent navigations we want to append to the referrer chain.
  static size_t CountOfRecentNavigationsToAppend(
      content::BrowserContext* browser_context,
      PrefService* prefs,
      AttributionResult result);

  // Appends |recent_navigation_count| number of recent navigation events to
  // referrer chain in reverse chronological order.
  void AppendRecentNavigations(size_t recent_navigation_count,
                               ReferrerChain* out_referrer_chain);

  // ui::Clipboard::ClipboardWriteObserver:
  // Event for new URLs copied to the clipboard
  void OnCopyURL(const GURL& url,
                 const GURL& source_frame_url,
                 const GURL& source_main_frame_url) override;

  // content::ServiceWorkerContextObserver implementation.
  void OnClientNavigated(const GURL& script_url, const GURL& url) override;
  void OnWindowOpened(const GURL& script_url, const GURL& url) override;

 protected:
  NavigationEventList* navigation_event_list() {
    return &navigation_event_list_;
  }

 private:
  friend class TestNavigationObserverManager;
  friend class SBNavigationObserverBrowserTest;
  friend class SBNavigationObserverTest;
  friend class ChromeClientSideDetectionHostDelegateTest;

  struct GurlHash {
    std::size_t operator()(const GURL& url) const {
      return std::hash<std::string>()(url.spec());
    }
  };

  typedef std::unordered_map<content::WebContents*, base::Time> UserGestureMap;
  typedef std::unordered_map<std::string, std::vector<ResolvedIPAddress>>
      HostToIpMap;

  HostToIpMap* host_to_ip_map() { return &host_to_ip_map_; }

  // Remove stale entries from navigation_event_list_ if they are older than
  // `GetNavigationFootprintTTL()`.
  void CleanUpNavigationEvents();

  // Remove stale entries from user_gesture_map_ if they are older than
  // `GetNavigationFootprintTTL()`.
  void CleanUpUserGestures();

  // Remove stale entries from host_to_ip_map_ if they are older than
  // `GetNavigationFootprintTTL()`.
  void CleanUpIpAddresses();

  // Remove stale copy entries.
  void CleanUpCopyData();

  // Remove stale entries from notification_navigation_events_.
  void CleanUpNotificationNavigationEvents();

  bool IsCleanUpScheduled() const;

  void ScheduleNextCleanUpAfterInterval(base::TimeDelta interval);

  // Adds the event to the referrer chain, unless it is older than
  // `GetNavigationFootprintTTL()`.
  void MaybeAddToReferrerChain(ReferrerChain* referrer_chain,
                               NavigationEvent* nav_event,
                               const GURL& destination_main_frame_url,
                               ReferrerChainEntry::URLType type);

  // Helper function to get the remaining referrer chain when we've already
  // traced back |current_user_gesture_count| number of user gestures.
  // This method uses a |last_nav_event_traced_index| to check where to start
  // in |navigation_events_|.
  // This function modifies the |out_referrer_chain| and |out_result|.
  void GetRemainingReferrerChain(size_t last_nav_event_traced_index,
                                 int current_user_gesture_count,
                                 int user_gesture_count_limit,
                                 ReferrerChain* out_referrer_chain,
                                 AttributionResult* out_result);

  // Helper function to get the remaining referrer chain when we've already
  // traced back |current_user_gesture_count| number of user gestures.
  // This method uses a |last_nav_event_traced_index| to check where to start
  // in |navigation_events_| and the |last_nav_event_traced| to get the next
  // entry. This function modifies the |out_referrer_chain| and |out_result|.
  void GetRemainingReferrerChainForNavEvent(
      NavigationEvent* last_nav_event_traced,
      size_t last_nav_event_traced_index,
      int current_user_gesture_count,
      int user_gesture_count_limit,
      ReferrerChain* out_referrer_chain,
      AttributionResult* out_result);

  // Removes URLs in |out_referrer_chain| that match the Safe Browsing allowlist
  // domains.
  void RemoveSafeBrowsingAllowlistDomains(ReferrerChain* out_referrer_chain);

  // navigation_event_list_ keeps track of all the observed navigations. Since
  // the same url can be requested multiple times across different tabs and
  // frames, this list of NavigationEvents are ordered by navigation finish
  // time. Entries in navigation_event_list_ will be removed if they are older
  // than 2 minutes since their corresponding navigations finish or there are
  // more than `GetNavigationRecordMaxSize()` entries.
  NavigationEventList navigation_event_list_;

  // user_gesture_map_ keeps track of the timestamp of last user gesture in
  // in each WebContents. We assume for majority of cases, a navigation
  // shortly after a user gesture indicate this navigation is user initiated.
  UserGestureMap user_gesture_map_;

  // Host to timestamped IP addresses map that covers all the main frame and
  // subframe URLs' hosts. Since it is possible for a host to resolve to more
  // than one IP in even a short period of time, we map a single host to a
  // vector of ResolvedIPAddresss.
  HostToIpMap host_to_ip_map_;

  // Unowned object used for getting preference settings.
  raw_ptr<PrefService> pref_service_;

  base::OneShotTimer cleanup_timer_;

  std::optional<CopyPasteEntry> last_copy_paste_entry_;

  // A map of destination URLs to Push notification initiated navigation events.
  base::flat_map<GURL, std::unique_ptr<NavigationEvent>>
      notification_navigation_events_;

  // A reference to the ServiceWorkerContext that enables us to observe clicks
  // on Push notifications.
  //
  // |notification_context_| is expected to outlive the
  // SafeBrowsingNavigationObserverManager.
  //
  // SafeBrowsingNavigationObserverManager is owned by
  // SafeBrowsingNavigationObserverManagerFactory which listens for
  // BrowserContextDestroyed events which happen before the BrowserContext is
  // destroyed. (Note: the BrowserContext initiates ServiceWorkerContext
  // destruction via the StoragePartition.)
  raw_ptr<content::ServiceWorkerContext> notification_context_;
};
}  // namespace safe_browsing

#endif  // COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_