File: lens_search_contextualization_controller.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (348 lines) | stat: -rw-r--r-- 15,265 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_
#define CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_

#include "chrome/browser/lens/core/mojom/lens_side_panel.mojom.h"
#include "chrome/browser/ui/lens/lens_overlay_query_controller.h"
#include "chrome/common/chrome_render_frame.mojom.h"
#include "components/lens/lens_overlay_invocation_source.h"
#include "components/omnibox/browser/autocomplete_match_type.h"
#include "components/tabs/public/tab_interface.h"
#include "pdf/buildflags.h"

#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#include "pdf/mojom/pdf.mojom.h"
#endif  // BUILDFLAG(ENABLE_PDF)

class LensSearchController;

namespace content {
class RenderFrameHost;
}  // namespace content

namespace content_extraction {
struct InnerTextResult;
}  // namespace content_extraction

namespace optimization_guide {
struct AIPageContentResult;
}  // namespace optimization_guide

using GetIsContextualSearchboxCallback =
    lens::mojom::LensSidePanelPageHandler::GetIsContextualSearchboxCallback;

namespace lens {

class LensSearchboxController;

// Callback type alias for page content bytes retrieved. Multiple pieces and
// types of content may be retrieved and returned in `page_contents`.
// `primary_content_type` is the main type used in the request flow and used to
// determine request params and whether updated requests need to be sent.
// `pdf_page_count` is the number of pages in the document being retrieved, not
// necessarily the number of pages in `bytes`. For example, if the document is a
// PDF, `pdf_page_count` is the number of pages in the PDF, while `bytes` could
// be empty because the PDF is too large.
using PageContentRetrievedCallback =
    base::OnceCallback<void(std::vector<lens::PageContent> page_contents,
                            lens::MimeType primary_content_type,
                            std::optional<uint32_t> pdf_page_count)>;

// Callback type alias for retrieving the text from the PDF pages one by one.
using PdfPartialPageTextRetrievedCallback =
    base::OnceCallback<void(std::vector<std::u16string> pdf_pages_text)>;

// Callback type alias for when the page context has been updated. This is used
// to allow requests to be made after the latest page context has been sent to
// the server.
using OnPageContextUpdatedCallback = base::OnceCallback<void()>;

// Controller responsible for handling contextualization logic for Lens flows.
// This includes grabbing content related to the page and issuing Lens requests
// so searchbox requests are contextualized.
class LensSearchContextualizationController {
 public:
  explicit LensSearchContextualizationController(
      LensSearchController* lens_search_controller);
  virtual ~LensSearchContextualizationController();

  // Internal state machine. States are mutually exclusive. Exposed for testing.
  enum class State {
    // This is the default state. The contextualization flow is not currently
    // active.
    kOff,

    // The contextualization flow is in the process of initializing.
    kInitializing,

    // The contextualization flow is active.
    kActive,

    // TODO(crbug.com/335516480): Implement suspended state.
    kSuspended,
  };
  State state() { return state_; }

  // Starts the contextualization flow without the overlay being shown to the
  // user. Virtual for testing.
  virtual void StartContextualization(
      lens::LensOverlayInvocationSource invocation_source,
      OnPageContextUpdatedCallback callback);

  // Tries to fetch the underlying page content bytes to use for
  // contextualization. If page content can not be retrieved, the callback will
  // be run with no bytes.
  void GetPageContextualization(PageContentRetrievedCallback callback);

  // Tries to fetch the underlying page content bytes and update the query flow
  // with them. `callback` will be run whether the page context was updated or
  // not.
  void TryUpdatePageContextualization(
      OnPageContextUpdatedCallback callback);

#if BUILDFLAG(ENABLE_PDF)
  // Fetches the visible page index from the PDF renderer and then starts the
  // process of fetching the text from the PDF to be used for suggest signals.
  // This is a no-op if the tab is not a PDF. Once the partial text is
  // retrieved, the text is sent to the server via the query controller.
  void FetchVisiblePageIndexAndGetPartialPdfText(
      uint32_t page_count,
      PdfPartialPageTextRetrievedCallback callback);
#endif  // BUILDFLAG(ENABLE_PDF)

  // Resets the state of the contextualization controller to kOff.
  void ResetState();

  // Records the UMA for the metrics relating to the document where the
  // contextual search box was shown. If this is a webpage, records the size of
  // the innerHtml and the innerText. If this is a PDF, records the byte size of
  // the PDF and the number of pages. `pdf_page_count` is only used for PDFs.
  void RecordDocumentMetrics(std::optional<uint32_t> pdf_page_count);

  // Posts a task to the background thread to calculate the OCR DOM similarity
  // and then records the result. Only records the similarity once per session.
  // Only records the similarity if the OCR text and page content are available.
  void TryCalculateAndRecordOcrDomSimilarity();

  // Sets the text of the page. Used to calculate the OCR DOM similarity.
  // Should only be called once per session.
  void SetText(lens::mojom::TextPtr text);

  // TODO(crbug.com/418825720): Remove this code once the early start query flow
  // optimization is fully launched as this will no longer be needed as all
  // context updates will go through this controller. Sets the page content and
  // primary content type for the controller. Only used in when the start query
  // flow optimization is not enabled to ensure that the page content is still
  // passed to the contextualization controller even if it does not make the
  // request to the server.
  void SetPageContent(std::vector<lens::PageContent> page_contents,
                      lens::MimeType primary_content_type);

  bool IsActive() const { return state_ == State::kActive; }

 private:
  // Begin updating page contextualization by potentially taking a new
  // screenshot.
  void UpdatePageContextualization(std::vector<lens::PageContent> page_contents,
                                   lens::MimeType primary_content_type,
                                   std::optional<uint32_t> pdf_page_count);

  // Continue updating page contextualization by potentially getting the current
  // PDF page.
  void UpdatePageContextualizationPart2(
      std::vector<lens::PageContent> page_contents,
      lens::MimeType primary_content_type,
      std::optional<uint32_t> pdf_page_count,
      const SkBitmap& bitmap);

  // Updates the query flow with the new page content bytes and/or screenshot. A
  // request will only be sent if the bytes are different from the previous
  // bytes sent or the screenshot is different from the previous screenshot.
  void UpdatePageContextualizationPart3(
      std::vector<lens::PageContent> page_contents,
      lens::MimeType primary_content_type,
      std::optional<uint32_t> pdf_page_count,
      const SkBitmap& bitmap,
      std::optional<uint32_t> pdf_current_page);

  // Gets the inner HTML for contextualization if flag enabled. Otherwise skip
  // to MaybeGetInnerText().
  void MaybeGetInnerHtml(std::vector<lens::PageContent> page_contents,
                         content::RenderFrameHost* render_frame_host,
                         PageContentRetrievedCallback callback);

  // Callback for when the inner HTML is retrieved from the underlying page.
  // Calls MaybeGetInnerText().
  void OnInnerHtmlReceived(std::vector<lens::PageContent> page_contents,
                           content::RenderFrameHost* render_frame_host,
                           PageContentRetrievedCallback callback,
                           const std::optional<std::string>& result);

  // Gets the inner text for contextualization if flag enabled. Otherwise skip
  // to MaybeGetAnnotatedPageContent().
  void MaybeGetInnerText(std::vector<lens::PageContent> page_contents,
                         content::RenderFrameHost* render_frame_host,
                         PageContentRetrievedCallback callback);

  // Callback for when the inner text is retrieved from the underlying page.
  // Calls MaybeGetAnnotatedPageContent().
  void OnInnerTextReceived(
      std::vector<lens::PageContent> page_contents,
      content::RenderFrameHost* render_frame_host,
      PageContentRetrievedCallback callback,
      std::unique_ptr<content_extraction::InnerTextResult> result);

  // Gets the annotated page content for contextualization if flag enabled.
  // Otherwise run the callback with the HTML and/or innerText.
  void MaybeGetAnnotatedPageContent(
      std::vector<lens::PageContent> page_contents,
      content::RenderFrameHost* render_frame_host,
      PageContentRetrievedCallback callback);

  // Callback for when the annotated page content is retrieved. Runs the
  // callback with the HTML, innerText, and/or annotated page content.
  void OnAnnotatedPageContentReceived(
      std::vector<lens::PageContent> page_contents,
      PageContentRetrievedCallback callback,
      std::optional<optimization_guide::AIPageContentResult> apc);

#if BUILDFLAG(ENABLE_PDF)
  // Gets the PDF bytes from the IPC call to the PDF renderer if the PDF
  // feature is enabled. Otherwise run the callback with no bytes.
  void MaybeGetPdfBytes(pdf::PDFDocumentHelper* pdf_helper,
                        PageContentRetrievedCallback callback);

  // Receives the PDF bytes from the IPC call to the PDF renderer and stores
  // them in initialization data. `pdf_page_count` is passed to the partial PDF
  // text fetch to be used to determine when to stop fetching.
  void OnPdfBytesReceived(PageContentRetrievedCallback callback,
                          pdf::mojom::PdfListener::GetPdfBytesStatus status,
                          const std::vector<uint8_t>& bytes,
                          uint32_t pdf_page_count);

  // Gets the partial text from the PDF to be used for suggest. Schedules for
  // the next page of text to be fetched, from the PDF in page order until
  // either 1) all the text is received or 2) the character limit is reached.
  // This method should only be called by GetPartialPdfText.
  void GetPartialPdfTextCallback(uint32_t page_index,
                                 uint32_t total_page_count,
                                 uint32_t total_characters_retrieved,
                                 const std::u16string& page_text);

  // Callback to run when the partial page text is retrieved from the PDF.
  void OnPdfPartialPageTextRetrieved(
      std::vector<std::u16string> pdf_pages_text);
#endif  // BUILDFLAG(ENABLE_PDF)

  bool IsScreenshotPossible(content::RenderWidgetHostView* view);

  void CaptureScreenshot(base::OnceCallback<void(const SkBitmap&)> callback);

  // Callback for when the screenshot is captured and initial request data is
  // ready.
  void DidCaptureScreenshot(
      mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
          chrome_render_frame,
      int attempt_id,
      const SkBitmap& bitmap,
      const std::vector<gfx::Rect>& bounds,
      OnPageContextUpdatedCallback callback,
      std::optional<uint32_t> pdf_current_page);

  // Fetches the bounding boxes of all images within the current viewport.
  void FetchViewportImageBoundingBoxes(OnPageContextUpdatedCallback callback,
                                       const SkBitmap& bitmap);

  // Creates the mojo bounding boxes for the significant regions.
  std::vector<lens::mojom::CenterRotatedBoxPtr> ConvertSignificantRegionBoxes(
      const std::vector<gfx::Rect>& all_bounds);

  // Gets the current page number if viewing a PDF.
  void GetPdfCurrentPage(
      mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
          chrome_render_frame,
      int attempt_id,
      const SkBitmap& bitmap,
      OnPageContextUpdatedCallback callback,
      const std::vector<gfx::Rect>& bounds);

  // Callback to record the size of the innerText once it is fetched.
  void RecordInnerTextSize(
      std::unique_ptr<content_extraction::InnerTextResult> result);

  // Callback to record the size of the innerHtml once it is fetched.
  void RecordInnerHtmlSize(const std::optional<std::string>& result);

  float GetUiScaleFactor();

  lens::LensOverlayQueryController* GetQueryController();
  lens::LensSearchboxController* GetSearchboxController();

  // The current state of the contextualization flow.
  State state_ = State::kOff;

  // Indicates whether the user is currently on a context eligible page.
  bool is_page_context_eligible_ = true;

  // The callback to run when the partial page text is retrieved. This is
  // populated when FetchVisiblePageIndexAndGetPartialPdfText is called.
  PdfPartialPageTextRetrievedCallback pdf_partial_page_text_retrieved_callback_;

  // The screenshot of the viewport.
  SkBitmap viewport_screenshot_;

  // The page url. Empty if it is not allowed to be shared.
  GURL page_url_;

  // The page title, if it is allowed to be shared.
  std::optional<std::string> page_title_;

  // The data of the content the user is viewing. There can be multiple
  // content types for a single page, so we store them all in this struct.
  std::vector<lens::PageContent> page_contents_;

  // The primary type of the data stored in page_contents_. This is the value
  // used to determine request params and what content to look at when
  // determining if the page_contents_ needs to be present.
  lens::MimeType primary_content_type_ = lens::MimeType::kUnknown;

  // The page count of the PDF document if page_content_type_ is kPdf.
  std::optional<uint32_t> pdf_page_count_;

  // The partial representation of a PDF document. The element at a given
  // index holds the text of the PDF page at the same index.
  std::vector<std::u16string> pdf_pages_text_;

  // The most visible page of the PDF document when the viewport was last
  // updated, if page_content_type_ is kPdf.
  std::optional<uint32_t> last_retrieved_most_visible_page_;

  // The callback for the caller to pass to this controller to be notified when
  // the page context has been updated and sent to the server.
  OnPageContextUpdatedCallback on_page_context_updated_callback_;

  // The text of the page. Used to calculate the OCR DOM similarity. Used once
  // per session and then cleared.
  lens::mojom::TextPtr text_;

  // The source of the invocation.
  lens::LensOverlayInvocationSource invocation_source_;

  // Whether the OCR DOM similarity has been recorded in the current session.
  bool ocr_dom_similarity_recorded_in_session_ = false;

  // Owns this.
  const raw_ptr<LensSearchController> lens_search_controller_;

  // Must be the last member.
  base::WeakPtrFactory<LensSearchContextualizationController> weak_ptr_factory_{
      this};
};

}  // namespace lens

#endif  // CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_