1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_
#include <stdint.h>
#include <set>
#include "base/functional/callback.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/timer/timer.h"
#include "components/optimization_guide/content/browser/page_text_dump_result.h"
#include "components/optimization_guide/content/mojom/page_text_service.mojom.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
namespace content {
class NavigationHandle;
} // namespace content
namespace optimization_guide {
// Provides callers with the text from web pages that they choose to request.
// Currently, the only method of obtaining text from the page is by recursively
// iterating through all DOM nodes. This is a very expensive operation which
// should be avoided whenever possible. Features that wish to get page text need
// to implement the |Consumer| interface. |Consumer::MaybeRequestFrameTextDump|
// is called on every navigation commit, at which time consumers must decide
// whether to request the page text to be dumped, and at what renderer event.
// This service will de-duplicate the requests and serve the responses to the
// provided callback.
class PageTextObserver : public content::WebContentsObserver,
public content::WebContentsUserData<PageTextObserver> {
public:
~PageTextObserver() override;
// Retrieves the instance of PageTextObserver that was attached
// to the specified WebContents. If no instance was attached, creates one,
// and attaches it to the specified WebContents.
static PageTextObserver* GetOrCreateForWebContents(
content::WebContents* web_contents);
// Contains all the information that is needed to request a text dump by a
// consumer.
struct ConsumerTextDumpRequest {
public:
ConsumerTextDumpRequest();
~ConsumerTextDumpRequest();
// The callback that is used to provide dumped page text.
using TextDumpCallback =
base::OnceCallback<void(const PageTextDumpResult&)>;
TextDumpCallback callback;
// The max size of the text dump in bytes. Note that the actual size
// that is passed in the callback may actually be greater than this value if
// another consumer requests a greater amount on the same event, or less on
// pages with little text.
uint32_t max_size = 0;
// Set when subframe text dumps should be taken on AMP subframes. A text
// dump of the mainframe will always also be taken. Consumer who set this
// should use |PageTextDumpResult::ConcatenateWithAMPHandling| on the
// |callback|.
bool dump_amp_subframes = false;
// All of the |TextDumpEvent|'s that have been requested.
std::set<mojom::TextDumpEvent> events;
};
// Callers should implement this class to request text dumps of pages at
// commit time.
class Consumer {
public:
// Called at commit of every main frame navigation. Consumers should return
// a request if they want to get the page text, or nullptr if not.
virtual std::unique_ptr<ConsumerTextDumpRequest> MaybeRequestFrameTextDump(
content::NavigationHandle* handle) = 0;
};
// Adds or removes a consumer. Consumers must remain valid between calling Add
// and Remove. Virtual for testing.
virtual void AddConsumer(Consumer* consumer);
virtual void RemoveConsumer(Consumer* consumer);
size_t outstanding_requests() const { return outstanding_requests_; }
// content::WebContentsObserver:
void DidFinishNavigation(content::NavigationHandle* handle) override;
void RenderFrameCreated(content::RenderFrameHost* rfh) override;
void DidFinishLoad(content::RenderFrameHost* render_frame_host,
const GURL& validated_url) override;
PageTextObserver(const PageTextObserver&) = delete;
PageTextObserver& operator=(const PageTextObserver&) = delete;
protected:
explicit PageTextObserver(content::WebContents* web_contents);
// Virtual for testing.
virtual bool IsOOPIF(content::RenderFrameHost* rfh) const;
private:
friend class content::WebContentsUserData<PageTextObserver>;
void OnFrameTextDumpCompleted(
std::optional<FrameTextDumpResult> frame_result);
void DispatchResponses();
// All registered consumers.
std::set<raw_ptr<Consumer, SetExperimental>> consumers_;
// A persisted set of consumer requests.
std::vector<std::unique_ptr<ConsumerTextDumpRequest>> requests_;
std::unique_ptr<PageTextDumpResult> page_result_;
// |outstanding_requests_grace_timer_| is set after |DidFinishLoad| if the
// number of |outstanding_requests_| is > 0. When the timer fires, the
// |page_result_| will be finialized and dispatched to consumers (in
// |DispatchResponses|).
std::unique_ptr<base::OneShotTimer> outstanding_requests_grace_timer_;
size_t outstanding_requests_ = 0;
base::WeakPtrFactory<PageTextObserver> weak_factory_{this};
WEB_CONTENTS_USER_DATA_KEY_DECL();
};
} // namespace optimization_guide
#endif // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_OBSERVER_H_
|