File: page_text_observer.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (136 lines) | stat: -rw-r--r-- 5,363 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_

#include <stdint.h>
#include <set>

#include "base/functional/callback.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/timer/timer.h"
#include "components/optimization_guide/content/browser/page_text_dump_result.h"
#include "components/optimization_guide/content/mojom/page_text_service.mojom.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"

namespace content {
class NavigationHandle;
}  // namespace content

namespace optimization_guide {

// Provides callers with the text from web pages that they choose to request.
// Currently, the only method of obtaining text from the page is by recursively
// iterating through all DOM nodes. This is a very expensive operation which
// should be avoided whenever possible. Features that wish to get page text need
// to implement the |Consumer| interface. |Consumer::MaybeRequestFrameTextDump|
// is called on every navigation commit, at which time consumers must decide
// whether to request the page text to be dumped, and at what renderer event.
// This service will de-duplicate the requests and serve the responses to the
// provided callback.
class PageTextObserver : public content::WebContentsObserver,
                         public content::WebContentsUserData<PageTextObserver> {
 public:
  ~PageTextObserver() override;

  // Retrieves the instance of PageTextObserver that was attached
  // to the specified WebContents. If no instance was attached, creates one,
  // and attaches it to the specified WebContents.
  static PageTextObserver* GetOrCreateForWebContents(
      content::WebContents* web_contents);

  // Contains all the information that is needed to request a text dump by a
  // consumer.
  struct ConsumerTextDumpRequest {
   public:
    ConsumerTextDumpRequest();
    ~ConsumerTextDumpRequest();

    // The callback that is used to provide dumped page text.
    using TextDumpCallback =
        base::OnceCallback<void(const PageTextDumpResult&)>;
    TextDumpCallback callback;

    // The max size of the text dump in bytes. Note that the actual size
    // that is passed in the callback may actually be greater than this value if
    // another consumer requests a greater amount on the same event, or less on
    // pages with little text.
    uint32_t max_size = 0;

    // Set when subframe text dumps should be taken on AMP subframes. A text
    // dump of the mainframe will always also be taken. Consumer who set this
    // should use |PageTextDumpResult::ConcatenateWithAMPHandling| on the
    // |callback|.
    bool dump_amp_subframes = false;

    // All of the |TextDumpEvent|'s that have been requested.
    std::set<mojom::TextDumpEvent> events;
  };

  // Callers should implement this class to request text dumps of pages at
  // commit time.
  class Consumer {
   public:
    // Called at commit of every main frame navigation. Consumers should return
    // a request if they want to get the page text, or nullptr if not.
    virtual std::unique_ptr<ConsumerTextDumpRequest> MaybeRequestFrameTextDump(
        content::NavigationHandle* handle) = 0;
  };

  // Adds or removes a consumer. Consumers must remain valid between calling Add
  // and Remove. Virtual for testing.
  virtual void AddConsumer(Consumer* consumer);
  virtual void RemoveConsumer(Consumer* consumer);

  size_t outstanding_requests() const { return outstanding_requests_; }

  // content::WebContentsObserver:
  void DidFinishNavigation(content::NavigationHandle* handle) override;
  void RenderFrameCreated(content::RenderFrameHost* rfh) override;
  void DidFinishLoad(content::RenderFrameHost* render_frame_host,
                     const GURL& validated_url) override;

  PageTextObserver(const PageTextObserver&) = delete;
  PageTextObserver& operator=(const PageTextObserver&) = delete;

 protected:
  explicit PageTextObserver(content::WebContents* web_contents);

  // Virtual for testing.
  virtual bool IsOOPIF(content::RenderFrameHost* rfh) const;

 private:
  friend class content::WebContentsUserData<PageTextObserver>;

  void OnFrameTextDumpCompleted(
      std::optional<FrameTextDumpResult> frame_result);

  void DispatchResponses();

  // All registered consumers.
  std::set<raw_ptr<Consumer, SetExperimental>> consumers_;

  // A persisted set of consumer requests.
  std::vector<std::unique_ptr<ConsumerTextDumpRequest>> requests_;

  std::unique_ptr<PageTextDumpResult> page_result_;

  // |outstanding_requests_grace_timer_| is set after |DidFinishLoad| if the
  // number of |outstanding_requests_| is > 0. When the timer fires, the
  // |page_result_| will be finialized and dispatched to consumers (in
  // |DispatchResponses|).
  std::unique_ptr<base::OneShotTimer> outstanding_requests_grace_timer_;
  size_t outstanding_requests_ = 0;

  base::WeakPtrFactory<PageTextObserver> weak_factory_{this};

  WEB_CONTENTS_USER_DATA_KEY_DECL();
};

}  // namespace optimization_guide

#endif  // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_