1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
|
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_
#define CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_
#include "base/memory/raw_ptr.h"
#include "chrome/browser/lens/core/mojom/lens_side_panel.mojom.h"
#include "chrome/browser/ui/lens/lens_overlay_query_controller.h"
#include "chrome/common/chrome_render_frame.mojom.h"
#include "components/lens/lens_overlay_invocation_source.h"
#include "components/omnibox/browser/autocomplete_match_type.h"
#include "components/optimization_guide/content/browser/page_context_eligibility.h"
#include "components/tabs/public/tab_interface.h"
#include "mojo/public/cpp/bindings/associated_remote.h"
#include "pdf/buildflags.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#include "pdf/mojom/pdf.mojom.h"
#endif // BUILDFLAG(ENABLE_PDF)
class LensSearchController;
namespace content {
class RenderFrameHost;
class RenderWidgetHostView;
} // namespace content
namespace content_extraction {
struct InnerTextResult;
} // namespace content_extraction
namespace optimization_guide {
struct AIPageContentResult;
} // namespace optimization_guide
using GetIsContextualSearchboxCallback =
lens::mojom::LensSidePanelPageHandler::GetIsContextualSearchboxCallback;
// Callback type alias for the when the page context eligibility is fetched.
using LensSearchPageContextEligibilityCallback = base::OnceCallback<void(bool)>;
namespace lens {
class LensSearchboxController;
// Callback type alias for page content bytes retrieved. Multiple pieces and
// types of content may be retrieved and returned in `page_contents`.
// `primary_content_type` is the main type used in the request flow and used to
// determine request params and whether updated requests need to be sent.
// `pdf_page_count` is the number of pages in the document being retrieved, not
// necessarily the number of pages in `bytes`. For example, if the document is a
// PDF, `pdf_page_count` is the number of pages in the PDF, while `bytes` could
// be empty because the PDF is too large.
using PageContentRetrievedCallback =
base::OnceCallback<void(std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> pdf_page_count)>;
// Callback type alias for retrieving the text from the PDF pages one by one.
using PdfPartialPageTextRetrievedCallback =
base::OnceCallback<void(std::vector<std::u16string> pdf_pages_text)>;
// Callback type alias for when the page context has been updated. This is used
// to allow requests to be made after the latest page context has been sent to
// the server.
using OnPageContextUpdatedCallback = base::OnceCallback<void()>;
// Controller responsible for handling contextualization logic for Lens flows.
// This includes grabbing content related to the page and issuing Lens requests
// so searchbox requests are contextualized.
class LensSearchContextualizationController {
public:
explicit LensSearchContextualizationController(
LensSearchController* lens_search_controller);
virtual ~LensSearchContextualizationController();
// Internal state machine. States are mutually exclusive. Exposed for testing.
enum class State {
// This is the default state. The contextualization flow is not currently
// active.
kOff,
// The contextualization flow is in the process of initializing.
kInitializing,
// The contextualization flow is active.
kActive,
// TODO(crbug.com/335516480): Implement suspended state.
kSuspended,
};
State state() { return state_; }
// Starts the contextualization flow without the overlay being shown to the
// user. Virtual for testing.
virtual void StartContextualization(
lens::LensOverlayInvocationSource invocation_source,
OnPageContextUpdatedCallback callback);
// Tries to fetch the underlying page content bytes to use for
// contextualization. If page content can not be retrieved, the callback will
// be run with no bytes.
void GetPageContextualization(PageContentRetrievedCallback callback);
// Tries to fetch the underlying page content bytes and update the query flow
// with them. `callback` will be run whether the page context was updated or
// not.
void TryUpdatePageContextualization(
OnPageContextUpdatedCallback callback);
#if BUILDFLAG(ENABLE_PDF)
// Fetches the visible page index from the PDF renderer and then starts the
// process of fetching the text from the PDF to be used for suggest signals.
// This is a no-op if the tab is not a PDF. Once the partial text is
// retrieved, the text is sent to the server via the query controller.
void FetchVisiblePageIndexAndGetPartialPdfText(
uint32_t page_count,
PdfPartialPageTextRetrievedCallback callback);
#endif // BUILDFLAG(ENABLE_PDF)
// Resets the state of the contextualization controller to kOff.
void ResetState();
// Records the UMA for the metrics relating to the document where the
// contextual search box was shown. If this is a webpage, records the size of
// the innerHtml and the innerText. If this is a PDF, records the byte size of
// the PDF and the number of pages. `pdf_page_count` is only used for PDFs.
void RecordDocumentMetrics(std::optional<uint32_t> pdf_page_count);
// Posts a task to the background thread to calculate the OCR DOM similarity
// and then records the result. Only records the similarity once per session.
// Only records the similarity if the OCR text and page content are available.
void TryCalculateAndRecordOcrDomSimilarity();
// Sets the text of the page. Used to calculate the OCR DOM similarity.
// Should only be called once per session.
void SetText(lens::mojom::TextPtr text);
// TODO(crbug.com/418825720): Remove this code once the early start query flow
// optimization is fully launched as this will no longer be needed as all
// context updates will go through this controller. Sets the page content and
// primary content type for the controller. Only used in when the start query
// flow optimization is not enabled to ensure that the page content is still
// passed to the contextualization controller even if it does not make the
// request to the server.
void SetPageContent(std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type);
// Returns whether the page is context eligible based on the URL and frame
// metadata provided. Calls the provided callback with the result. This
// function makes a call to the page context eligibility API on whether the
// latest contextualized data is eligible to be sent. This is in contrast to
// `GetCurrentPageContextEligibility` which returns the latest cached state.
void IsPageContextEligible(
const GURL& main_frame_url,
std::vector<optimization_guide::FrameMetadata> frame_metadata,
LensSearchPageContextEligibilityCallback callback);
// Override these methods to be able to track calls made to the page context
// eligibility API.
virtual void CreatePageContextEligibilityAPI();
// Returns whether the page is context eligible based on the latest cached
// state. If the page context eligibility API has not been loaded, this will
// return false.
virtual bool GetCurrentPageContextEligibility();
bool IsActive() const { return state_ == State::kActive; }
protected:
// The page context eligibility API if it has been fetched. Can be nullptr.
// This is marked protected so that it can be accessed by the test
// implementation of this class.
raw_ptr<optimization_guide::PageContextEligibility> page_context_eligibility_;
private:
struct PageContextEligibilityParams {
public:
PageContextEligibilityParams(
const GURL& main_frame_url,
std::vector<optimization_guide::FrameMetadata> frame_metadata);
~PageContextEligibilityParams();
GURL main_frame_url;
std::vector<optimization_guide::FrameMetadata> frame_metadata;
};
// Called when the page context eligibility API is loaded.
void OnPageContextEligibilityAPILoaded(
optimization_guide::PageContextEligibility* page_context_eligibility);
// Called when the initial page context eligibility is fetched. This should be
// used for the initial check as the APC may not have been received yet. For
// subsequent checks, use `OnPageContextEligibilityFetched`.
void OnInitialPageContextEligibilityFetched(
const SkBitmap& bitmap,
const std::vector<gfx::Rect>& all_bounds,
std::optional<uint32_t> pdf_current_page,
OnPageContextUpdatedCallback callback,
bool is_page_context_eligible);
// Begin updating page contextualization by potentially taking a new
// screenshot.
void UpdatePageContextualization(std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> pdf_page_count);
// Continue updating page contextualization by potentially getting the current
// PDF page.
void UpdatePageContextualizationPart2(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> pdf_page_count,
const SkBitmap& bitmap);
// Updates the query flow with the new page content bytes and/or screenshot. A
// request will only be sent if the bytes are different from the previous
// bytes sent or the screenshot is different from the previous screenshot.
void UpdatePageContextualizationPart3(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> pdf_page_count,
const SkBitmap& bitmap,
std::optional<uint32_t> pdf_current_page);
// Gets the inner HTML for contextualization if flag enabled. Otherwise skip
// to MaybeGetInnerText().
void MaybeGetInnerHtml(std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback);
// Callback for when the inner HTML is retrieved from the underlying page.
// Calls MaybeGetInnerText().
void OnInnerHtmlReceived(std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback,
const std::optional<std::string>& result);
// Gets the inner text for contextualization if flag enabled. Otherwise skip
// to MaybeGetAnnotatedPageContent().
void MaybeGetInnerText(std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback);
// Callback for when the inner text is retrieved from the underlying page.
// Calls MaybeGetAnnotatedPageContent().
void OnInnerTextReceived(
std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback,
std::unique_ptr<content_extraction::InnerTextResult> result);
// Gets the annotated page content for contextualization if flag enabled.
// Otherwise run the callback with the HTML and/or innerText.
void MaybeGetAnnotatedPageContent(
std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback);
// Callback for when the annotated page content is retrieved. Runs the
// callback with the HTML, innerText, and/or annotated page content.
void OnAnnotatedPageContentReceived(
std::vector<lens::PageContent> page_contents,
PageContentRetrievedCallback callback,
std::optional<optimization_guide::AIPageContentResult> apc);
// Callback for when the page context eligibility is fetched. This should only
// be used after the APC has been received. For the initial check before the
// APC is received, use `OnInitialPageContextEligibilityFetched`.
void OnPageContextEligibilityFetched(
std::vector<lens::PageContent> page_contents,
PageContentRetrievedCallback callback,
std::optional<optimization_guide::AIPageContentResult> result,
bool is_page_context_eligible);
#if BUILDFLAG(ENABLE_PDF)
// Gets the PDF bytes from the IPC call to the PDF renderer if the PDF
// feature is enabled. Otherwise run the callback with no bytes.
void MaybeGetPdfBytes(pdf::PDFDocumentHelper* pdf_helper,
PageContentRetrievedCallback callback);
// Receives the PDF bytes from the IPC call to the PDF renderer and stores
// them in initialization data. `pdf_page_count` is passed to the partial PDF
// text fetch to be used to determine when to stop fetching.
void OnPdfBytesReceived(PageContentRetrievedCallback callback,
pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes,
uint32_t pdf_page_count);
// Gets the partial text from the PDF to be used for suggest. Schedules for
// the next page of text to be fetched, from the PDF in page order until
// either 1) all the text is received or 2) the character limit is reached.
// This method should only be called by GetPartialPdfText.
void GetPartialPdfTextCallback(uint32_t page_index,
uint32_t total_page_count,
uint32_t total_characters_retrieved,
const std::u16string& page_text);
// Callback to run when the partial page text is retrieved from the PDF.
void OnPdfPartialPageTextRetrieved(
std::vector<std::u16string> pdf_pages_text);
#endif // BUILDFLAG(ENABLE_PDF)
bool IsScreenshotPossible(content::RenderWidgetHostView* view);
void CaptureScreenshot(base::OnceCallback<void(const SkBitmap&)> callback);
// Callback for when the screenshot is captured and initial request data is
// ready.
void DidCaptureScreenshot(
mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
chrome_render_frame,
int attempt_id,
const SkBitmap& bitmap,
const std::vector<gfx::Rect>& bounds,
OnPageContextUpdatedCallback callback,
std::optional<uint32_t> pdf_current_page);
// Fetches the bounding boxes of all images within the current viewport.
void FetchViewportImageBoundingBoxes(OnPageContextUpdatedCallback callback,
const SkBitmap& bitmap);
// Creates the mojo bounding boxes for the significant regions.
std::vector<lens::mojom::CenterRotatedBoxPtr> ConvertSignificantRegionBoxes(
const std::vector<gfx::Rect>& all_bounds);
// Gets the current page number if viewing a PDF.
void GetPdfCurrentPage(
mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
chrome_render_frame,
int attempt_id,
const SkBitmap& bitmap,
OnPageContextUpdatedCallback callback,
const std::vector<gfx::Rect>& bounds);
// Callback to record the size of the innerText once it is fetched.
void RecordInnerTextSize(
std::unique_ptr<content_extraction::InnerTextResult> result);
// Callback to record the size of the innerHtml once it is fetched.
void RecordInnerHtmlSize(const std::optional<std::string>& result);
float GetUiScaleFactor();
lens::LensOverlayQueryController* GetQueryController();
lens::LensSearchboxController* GetSearchboxController();
// The current state of the contextualization flow.
State state_ = State::kOff;
// Indicates whether the user is currently on a context eligible page.
bool is_page_context_eligible_ = true;
// The callback to run when the partial page text is retrieved. This is
// populated when FetchVisiblePageIndexAndGetPartialPdfText is called.
PdfPartialPageTextRetrievedCallback pdf_partial_page_text_retrieved_callback_;
// The screenshot of the viewport.
SkBitmap viewport_screenshot_;
// The page url. Empty if it is not allowed to be shared.
GURL page_url_;
// The page title, if it is allowed to be shared.
std::optional<std::string> page_title_;
// The data of the content the user is viewing. There can be multiple
// content types for a single page, so we store them all in this struct.
std::vector<lens::PageContent> page_contents_;
// The primary type of the data stored in page_contents_. This is the value
// used to determine request params and what content to look at when
// determining if the page_contents_ needs to be present.
lens::MimeType primary_content_type_ = lens::MimeType::kUnknown;
// The page count of the PDF document if page_content_type_ is kPdf.
std::optional<uint32_t> pdf_page_count_;
// The partial representation of a PDF document. The element at a given
// index holds the text of the PDF page at the same index.
std::vector<std::u16string> pdf_pages_text_;
// The most visible page of the PDF document when the viewport was last
// updated, if page_content_type_ is kPdf.
std::optional<uint32_t> last_retrieved_most_visible_page_;
// The callback for the caller to pass to this controller to be notified when
// the page context has been updated and sent to the server.
OnPageContextUpdatedCallback on_page_context_updated_callback_;
// The text of the page. Used to calculate the OCR DOM similarity. Used once
// per session and then cleared.
lens::mojom::TextPtr text_;
// The source of the invocation.
lens::LensOverlayInvocationSource invocation_source_;
// Whether the OCR DOM similarity has been recorded in the current session.
bool ocr_dom_similarity_recorded_in_session_ = false;
// Whether the page context eligibility API has been loaded in the current tab
// session.
bool has_page_context_eligibility_api_loaded_ = false;
// Stored page context eligibility parameters to be used once the API is
// loaded. This is only used if the API is not yet loaded when
// IsPageContextEligible() is called and `page_context_eligibility_callback_`
// is set.
std::optional<PageContextEligibilityParams>
pending_context_eligibility_params_;
// A stored context eligibility callback to be called once the page context
// eligibility API is loaded.
LensSearchPageContextEligibilityCallback page_context_eligibility_callback_;
// Owns this.
const raw_ptr<LensSearchController> lens_search_controller_;
// Must be the last member.
base::WeakPtrFactory<LensSearchContextualizationController> weak_ptr_factory_{
this};
};
} // namespace lens
#endif // CHROME_BROWSER_UI_LENS_LENS_SEARCH_CONTEXTUALIZATION_CONTROLLER_H_
|