1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
|
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/contextual_cueing/contextual_cueing_page_data.h"
#include "base/i18n/char_iterator.h"
#include "base/strings/string_util.h"
#include "base/task/single_thread_task_runner.h"
#include "chrome/browser/contextual_cueing/contextual_cueing_features.h"
#include "content/public/browser/web_contents.h"
#include "pdf/buildflags.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace contextual_cueing {
namespace {
bool DidMatchCueingCondition(
const optimization_guide::proto::ContextualCueingConditions& condition,
int64_t value) {
if (!condition.has_cueing_operator()) {
return false;
}
if (!condition.has_int64_threshold()) {
return false;
}
switch (condition.cueing_operator()) {
case optimization_guide::proto::CONTEXTUAL_CUEING_OPERATOR_UNSPECIFIED:
return false;
case optimization_guide::proto::
CONTEXTUAL_CUEING_OPERATOR_GREATER_THAN_OR_EQUAL_TO:
return value >= condition.int64_threshold();
case optimization_guide::proto::
CONTEXTUAL_CUEING_OPERATOR_LESS_THAN_OR_EQUAL_TO:
return value <= condition.int64_threshold();
}
}
void CountWords(const optimization_guide::proto::ContentNode& content_node,
size_t max_word_count_limit,
size_t* word_count) {
bool is_previous_char_whitespace = true;
for (base::i18n::UTF8CharIterator iter(
content_node.content_attributes().text_data().text_content());
*word_count < max_word_count_limit && !iter.end(); iter.Advance()) {
bool is_current_char_whitespace = base::IsUnicodeWhitespace(iter.get());
if (is_previous_char_whitespace && !is_current_char_whitespace) {
// Count the start of the word.
++*word_count;
}
is_previous_char_whitespace = is_current_char_whitespace;
}
for (const auto& child : content_node.children_nodes()) {
CountWords(child, max_word_count_limit, word_count);
}
}
} // namespace
ContextualCueingPageData::ContextualCueingPageData(
content::Page& page,
optimization_guide::proto::GlicContextualCueingMetadata metadata,
CueingDecisionCallback cueing_decision_callback)
: content::PageUserData<ContextualCueingPageData>(page),
metadata_(std::move(metadata)),
cueing_decision_callback_(std::move(cueing_decision_callback)) {
FindMatchingConfig();
}
ContextualCueingPageData::~ContextualCueingPageData() {
if (cueing_decision_callback_) {
std::move(cueing_decision_callback_)
.Run(base::unexpected(NudgeDecision::kNudgeDecisionInterrupted));
}
}
PAGE_USER_DATA_KEY_IMPL(ContextualCueingPageData);
// Attempts to find the matching cueing configuration.
void ContextualCueingPageData::FindMatchingConfig() {
CHECK(cueing_decision_callback_);
bool needs_pdf_page_count = false;
bool needs_page_content = false;
for (const auto& config : metadata_.cueing_configurations()) {
if (!config.has_cue_label()) {
continue;
}
auto decision = DidMatchCueingConditions(config);
if (decision == kAllowed) {
std::move(cueing_decision_callback_)
.Run(base::ok(std::move(config.cue_label())));
return;
} else if (decision == kNeedsPdfPageCount) {
needs_pdf_page_count = true;
} else if (decision == kNeedsPageContent) {
needs_page_content = true;
}
}
if (needs_pdf_page_count) {
#if BUILDFLAG(ENABLE_PDF)
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&ContextualCueingPageData::RequestPdfPageCount,
weak_factory_.GetWeakPtr()),
kPdfPageCountCaptureDelay.Get());
return;
#endif // BUILDFLAG(ENABLE_PDF)
}
if (needs_page_content) {
// Wait till the page content is returned.
return;
}
// None of the config matched, and no client-signals were requested.
std::move(cueing_decision_callback_)
.Run(base::unexpected(NudgeDecision::kClientConditionsUnmet));
}
ContextualCueingPageData::CueingConfigurationDecision
ContextualCueingPageData::DidMatchCueingConditions(
const optimization_guide::proto::GlicCueingConfiguration& config) {
for (const auto& condition : config.conditions()) {
switch (condition.signal()) {
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_UNSPECIFIED:
return kDisallowed;
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_PDF_PAGE_COUNT:
if (page().GetContentsMimeType() != pdf::kPDFMimeType) {
return kDisallowed;
}
if (!pdf_page_count_) {
return kNeedsPdfPageCount;
}
return DidMatchCueingCondition(condition, *pdf_page_count_)
? kAllowed
: kDisallowed;
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_CONTENT_LENGTH_WORD_COUNT:
if (page().GetContentsMimeType() == pdf::kPDFMimeType) {
return kDisallowed;
}
if (page_content_word_count_info_ &&
page_content_word_count_info_->page_contents_words) {
return DidMatchCueingCondition(
condition,
*page_content_word_count_info_->page_contents_words)
? kAllowed
: kDisallowed;
}
if (!page_content_word_count_info_) {
page_content_word_count_info_ = {.max_count_needed = 0};
}
if (page_content_word_count_info_->max_count_needed <
static_cast<size_t>(condition.int64_threshold())) {
page_content_word_count_info_->max_count_needed =
static_cast<size_t>(condition.int64_threshold()) + 1;
}
return kNeedsPageContent;
}
}
return kAllowed;
}
#if BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::RequestPdfPageCount() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) {
pdf_helper->RegisterForDocumentLoadComplete(
base::BindOnce(&ContextualCueingPageData::OnPdfDocumentLoadComplete,
weak_factory_.GetWeakPtr()));
}
}
void ContextualCueingPageData::OnPdfDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(&ContextualCueingPageData::OnPdfPageCountReceived,
weak_factory_.GetWeakPtr()));
}
}
void ContextualCueingPageData::OnPdfPageCountReceived(
pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes,
uint32_t page_count) {
if (status == pdf::mojom::PdfListener::GetPdfBytesStatus::kFailed) {
return;
}
pdf_page_count_ = page_count;
FindMatchingConfig();
}
#endif // BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::OnPageContentExtracted(
const optimization_guide::proto::AnnotatedPageContent& page_content) {
if (!cueing_decision_callback_) {
return;
}
if (!page_content_word_count_info_) {
return;
}
size_t word_count = 0;
CountWords(page_content.root_node(),
page_content_word_count_info_->max_count_needed, &word_count);
page_content_word_count_info_->page_contents_words = word_count;
FindMatchingConfig();
}
} // namespace contextual_cueing
|