1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
|
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_
#define COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_
#include <stddef.h>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "base/containers/flat_map.h"
#include "base/i18n/rtl.h"
#include "components/autofill/content/renderer/timing.h"
#include "components/autofill/core/common/autofill_constants.h"
#include "components/autofill/core/common/dense_set.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/form_field_data.h"
#include "components/autofill/core/common/mojom/autofill_types.mojom-shared.h"
#include "components/autofill/core/common/unique_ids.h"
#include "third_party/blink/public/web/web_autofill_state.h"
#include "third_party/blink/public/web/web_element_collection.h"
#include "third_party/blink/public/web/web_form_control_element.h"
class GURL;
namespace blink {
enum class WebAutofillState;
class WebDocument;
class WebElement;
class WebFormControlElement;
class WebFormElement;
class WebInputElement;
class WebLocalFrame;
class WebNode;
} // namespace blink
namespace content {
class RenderFrame;
} // namespace content
namespace autofill {
class FieldDataManager;
class FormData;
class FormFieldData;
class SynchronousFormCache;
namespace form_util {
// This file contains utility function related to form and form field
// extraction, label inference, DOM traversal, and form field preview and
// autofilling.
//
// See README.md for the terminology used in this file.
// Mapping from a form element's render id to results of button titles
// heuristics for a given form element.
using ButtonTitlesCache = base::flat_map<FormRendererId, ButtonTitleList>;
// A bit field mask to extract data from WebFormControlElement.
// Copied to components/autofill/ios/browser/resources/autofill_controller.js.
enum class ExtractOption {
kBounds, // Extract bounds from WebFormControlElement, could
// trigger layout if needed.
kDatalist, // Extract datalist from WebFormControlElement, the total
// number of options is up to kMaxListSize and each option
// has as far as kMaxDataLength.
kMinValue = kBounds,
kMaxValue = kDatalist,
};
// Extract FormData from `form_element` or the unowned form if
// `form_element.IsNull()`.
//
// The document must be the one we want to extract fields from. In other words:
// Do not blindly pass "some" WebDocument for `document`! If `form_element` is
// non-null, `document` must obviously be `form_element`'s document. As a rule
// of thumb, avoid passing "the current frame's document" but instead, whenever
// possible, pass WebForm[Control]Element::GetDocument() of the form or of the
// field whose form that we want to extract.
std::optional<FormData> ExtractFormData(
const blink::WebDocument& document,
const blink::WebFormElement& form_element,
const FieldDataManager& field_data_manager,
const CallTimerState& timer_state,
ButtonTitlesCache* button_titles_cache,
DenseSet<ExtractOption> extract_options = {});
// Helper function to assist in getting the canonical form of the action and
// origin. The action will properly take into account <BASE>, and both will
// strip unnecessary data (e.g. query params and HTTP credentials).
GURL GetCanonicalActionForForm(const blink::WebFormElement& form);
// Returns true if `element` is a textarea element.
bool IsTextAreaElement(const blink::WebFormControlElement& element);
// Returns true if `element` is a textarea element or a text input element.
bool IsTextAreaElementOrTextInput(const blink::WebFormControlElement& element);
// Returns true if `element` is connected and not in a user-agent tree.
bool IsAccessible(const blink::WebNode& node);
// Returns true if `element` is one of the element types that can be autofilled.
// {Text, Radiobutton, Checkbox, Select, TextArea}.
// TODO(crbug.com/40100455): IsAutofillableElement() are currently used
// inconsistently. Investigate where these checks are necessary.
bool IsAutofillableElement(const blink::WebFormControlElement& element);
// Returns the current FormControlType of `element` or kInputPassword if
// `element` ever was an <input type=password>.
std::optional<FormControlType> GetAutofillFormControlType(
const blink::WebFormControlElement& element);
// Returns true iff `element` has a "webauthn" autocomplete attribute.
bool IsWebauthnTaggedElement(const blink::WebFormControlElement& element);
// Returns true if |element| can be edited (enabled and not read only).
bool IsElementEditable(const blink::WebInputElement& element);
// Returns the FormRendererId of a given WebFormElement or contenteditable. If
// WebFormElement::IsNull(), returns a null form renderer id, which is the
// renderer id of the unowned form.
FormRendererId GetFormRendererId(const blink::WebElement& e);
// Returns the FieldRendererId of a given WebFormControlElement or
// contenteditable.
FieldRendererId GetFieldRendererId(const blink::WebElement& e);
// Returns text alignment for |element|.
base::i18n::TextDirection GetTextDirectionForElement(
const blink::WebFormControlElement& element);
// Returns all connected, autofillable form control elements
// - owned by `form_element` if `!form_element.IsNull()`;
// - owned by no form otherwise.
std::vector<blink::WebFormControlElement> GetOwnedAutofillableFormControls(
const blink::WebDocument& document,
const blink::WebFormElement& form_element);
// Extracts the FormData that represents the form of `element`. If that form
// cannot be extracted (e.g., because it is too large), falls back to a
// single-field form that contains `element`. If however `element` is not
// autofillable, returns nullopt. `form_cache` can be used to optimize form
// extractions occurring synchronously after this function call.
std::optional<std::pair<FormData, raw_ref<const FormFieldData>>>
FindFormAndFieldForFormControlElement(
const blink::WebFormControlElement& element,
const FieldDataManager& field_data_manager,
const CallTimerState& timer_state,
form_util::ButtonTitlesCache* button_titles_cache,
DenseSet<ExtractOption> extract_options,
const SynchronousFormCache& form_cache);
// Creates a FormData containing a single field out of a contenteditable
// non-form element. The FormData is synthetic in the sense that it does not
// correspond to any other DOM element. It is also conceptually distinct from
// the unowned form (i.e., the collection of form control elements that aren't
// owned by any form).
//
// Returns `std::nullopt` if `contenteditable`:
// - is a WebFormElement; otherwise, there could be two FormData objects with
// identical renderer ID referring to different conceptual forms: the one for
// the contenteditable and an actual <form>.
// - is a WebFormControlElement; otherwise, a <textarea contenteditable> might
// be a member of two FormData objects: the one for the contenteditable and
// the <textarea>'s associated <form>'s FormData.
// - has a contenteditable parent; this is to disambiguate focus elements on
// nested contenteditables because the focus event propagates up.
//
// The FormData's renderer ID has the same value as its (single) FormFieldData's
// renderer ID. This is collision-free with the renderer IDs of any other form
// in the document because DomNodeIds are unique among all DOM elements.
std::optional<FormData> FindFormForContentEditable(
const blink::WebElement& content_editable);
// Fills or previews the fields represented by `fields`.
// `initiating_element` is the element that initiated the autofill process.
// Returns a list of pairs of the filled elements and their autofill state
// prior to the filling.
std::vector<std::pair<FieldRendererId, blink::WebAutofillState>>
ApplyFieldsAction(const blink::WebDocument& document,
base::span<const FormFieldData::FillData> fields,
mojom::FormActionType action_type,
mojom::ActionPersistence action_persistence,
FieldDataManager& field_data_manager);
// Clears the suggested values in `previewed_elements`.
// `initiating_element` is the element that initiated the preview operation.
// `old_autofill_state` is the previous state of the field that initiated the
// preview.
void ClearPreviewedElements(
base::span<std::pair<blink::WebFormControlElement, blink::WebAutofillState>>
previewed_elements);
// Indicates if |node| is owned by |frame| in the sense of
// https://dom.spec.whatwg.org/#concept-node-document. Note that being owned by
// a frame does not require being attached to its DOM.
bool IsOwnedByFrame(const blink::WebNode& node, content::RenderFrame* frame);
// Returns true if `node` is currently owned by `frame` or its frame is nullptr,
// in which case the frame is not known anymore. It is a weaker condition than
// `IsOwnedByFrame(node, frame)`.
bool MaybeWasOwnedByFrame(const blink::WebNode& node,
content::RenderFrame* frame);
// Checks if the webpage is empty.
// This kind of webpage is considered as empty:
// <html>
// <head>
// </head>
// <body>
// </body>
// </html>
// Meta, script and title tags don't influence the emptiness of a webpage.
bool IsWebpageEmpty(const blink::WebLocalFrame* frame);
// Returns the aggregated values of the descendants of |element| that are
// non-empty text nodes. This is a faster alternative to |innerText()| for
// performance critical operations. It does a full depth-first search so can be
// used when the structure is not directly known. However, unlike with
// |innerText()|, the search depth and breadth are limited to a fixed threshold.
// Whitespace is trimmed from text accumulated at descendant nodes.
std::u16string FindChildText(const blink::WebNode& node);
// Returns the button titles for |web_form|. |button_titles_cache| can be used
// to spare recomputation if called multiple times for the same form.
ButtonTitleList GetButtonTitles(const blink::WebFormElement& web_form,
ButtonTitlesCache* button_titles_cache);
// Returns the form element by unique renderer id. Returns the null element if
// there is no form with the |form_renderer_id|.
blink::WebFormElement GetFormByRendererId(FormRendererId form_renderer_id);
// Returns the form control element by unique renderer id.
// |form_to_be_searched| could be used as an optimization to only search for
// elements in it, but doesn't guarantee that the returned element will belong
// to it. Returns the null element if there is no element with the
// |queried_form_control| renderer id.
blink::WebFormControlElement GetFormControlByRendererId(
FieldRendererId queried_form_control);
blink::WebElement GetContentEditableByRendererId(
FieldRendererId field_renderer_id);
std::string GetAutocompleteAttribute(const blink::WebElement& element);
// Iterates through the node neighbors of form and form control elements in
// `document` in search of four digit combinations.
void TraverseDomForFourDigitCombinations(
const blink::WebDocument& document,
base::OnceCallback<void(const std::vector<std::string>&)>
potential_matches);
// This algorithm attempts to extract the final-checkout-amount using regex
// matching. Since the document may list the prices of individual items, a
// second regex is used to identify order-total labels. It is assumed that the
// true final-checkout-amount node is the price node that is the shortest
// distance to a label node. The distance of a price node to a label node is
// measured by the length of the path from the price node to their lowest common
// ancestor. If there are multiple such nodes, returning the first match is
// fine. The returned string is empty if a final-checkout-amount is not found,
// and a string (including dollar signs, periods, and commas) of the
// final-checkout-amount text node value if one is found. The
// final-checkout-amount-text-node is the text node that is deemed to contain
// the final-checkout-amount of the checkout page (ex: a text node containing
// the text "$100.00").
//
// `price_regex` is a regex that is used to check if a text node is a price node
// (and all price nodes are potential final-checkout-amount-nodes).
// `label_regex` is a regex that is used to check if a text node is a label
// node. Label nodes are nodes that, if found near price nodes, are deemed to
// label that price node as a final-checkout-amount. They contain text that
// implies a final-checkout-amount, such as "Order total" or "Total amount".
// `number_of_ancestor_levels_to_search` denotes how many levels of ancestors of
// price nodes should be searched to look for a label node.
//
// Some features in Payments Autofill need to know the final-checkout-amount of
// a page to work properly, for example BNPL. This algorithm attempts to extract
// the final-checkout-amount from the page. It will not always be reliable, but
// from manual testing it works 90%+ of the time.
//
// Example:
// <div>
// <div>
// <span>
// <span>$56.70</span>
// </span>
// <span>
// <span>Total amount:</span>
// </span>
// </div>
// <div>
// <div>
// <div>
// <span>
// <span>$100.00</span>
// </span>
// </div>
// </div>
// </div>
// </div>
//
// In the example above, the search will start at the price nodes "$56.70" and
// "$100.00", then go up and search the subtrees of their ancestors. 2 ancestor
// levels up from the "$56.70" price node, it will reach the 2nd <div> block,
// and find the label node "Total amount:" in its subtree, thus returning the
// final-checkout-amount-node's value as "$56.70". Since the $100.00 price node
// is further away, it will not be considered as the final-checkout-amount.
std::string ExtractFinalCheckoutAmountFromDom(
const blink::WebDocument& document,
std::string_view price_regex,
std::string_view label_regex,
size_t number_of_ancestor_levels_to_search);
// Attempts to update `FormFieldData::user_input_` of `field`, whose DOM element
// is identified by `element_id`, using `field_data_manager`.
void MaybeUpdateUserInput(FormFieldData& field,
FieldRendererId element_id,
const FieldDataManager& field_data_manager);
// The following functions exist in as internal helper functions in
// form_autofill_util.cc and are exposed here just for testing purposes. Check
// the wrapped functions in the .cc file for documentation.
std::vector<blink::WebFormControlElement> GetOwnedFormControlsForTesting(
const blink::WebDocument& document,
const blink::WebFormElement& form_element);
blink::WebNode NextWebNodeForTesting(const blink::WebNode& current_node,
bool forward);
std::u16string GetAriaLabelForTesting(const blink::WebDocument& document,
const blink::WebElement& element);
std::u16string GetAriaDescriptionForTesting(const blink::WebDocument& document,
const blink::WebElement& element);
void InferLabelForElementsForTesting(
base::span<const blink::WebFormControlElement> control_elements,
std::vector<FormFieldData>& fields);
std::u16string FindChildTextWithIgnoreListForTesting(
const blink::WebNode& node,
const std::set<blink::WebNode>& divs_to_skip);
std::vector<SelectOption> GetDataListOptionsForTesting(
const blink::WebInputElement& element);
blink::WebFormElement GetClosestAncestorFormElementForTesting(blink::WebNode n);
bool IsDOMPredecessorForTesting(const blink::WebNode& x,
const blink::WebNode& y,
const blink::WebNode& ancestor_hint);
bool IsWebElementVisibleForTesting(const blink::WebElement& element);
bool IsVisibleIframeForTesting(const blink::WebElement& iframe_element);
uint64_t GetMaxLengthForTesting(const blink::WebFormControlElement& element);
void WebFormControlElementToFormFieldForTesting(
const blink::WebFormElement& form_element,
const blink::WebFormControlElement& element,
const FieldDataManager* field_data_manager,
DenseSet<ExtractOption> extract_options,
FormFieldData* field);
} // namespace form_util
} // namespace autofill
#endif // COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_
|