1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
|
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_STRUCTURE_H_
#define COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_STRUCTURE_H_
#include <stddef.h>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "base/containers/flat_map.h"
#include "base/gtest_prod_util.h"
#include "base/memory/raw_ptr.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/country_type.h"
#include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/browser/form_parsing/field_candidates.h"
#include "components/autofill/core/browser/form_types.h"
#include "components/autofill/core/browser/proto/api_v1.pb.h"
#include "components/autofill/core/common/autofill_constants.h"
#include "components/autofill/core/common/dense_set.h"
#include "components/autofill/core/common/form_field_data.h"
#include "components/autofill/core/common/language_code.h"
#include "components/autofill/core/common/mojom/autofill_types.mojom.h"
#include "components/autofill/core/common/unique_ids.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace base {
class TimeTicks;
}
namespace autofill {
class LogBuffer;
class LogManager;
struct ParsingContext;
// The structure of forms and fields, represented by their signatures, on a
// page. These are sequence containers to reflect their order in the DOM.
using FormAndFieldSignatures =
std::vector<std::pair<FormSignature, std::vector<FieldSignature>>>;
using FieldSuggestion = AutofillQueryResponse::FormSuggestion::FieldSuggestion;
class FormData;
struct FormDataPredictions;
// FormStructure stores a single HTML form together with the values entered
// in the fields along with additional information needed by Autofill.
class FormStructure {
public:
explicit FormStructure(const FormData& form);
FormStructure(const FormStructure&) = delete;
FormStructure& operator=(const FormStructure&) = delete;
virtual ~FormStructure();
// Runs several heuristics against the form fields to determine their possible
// types.
void DetermineHeuristicTypes(
const GeoIpCountryCode& client_country,
LogManager* log_manager);
// Runs rationalization and sectioning. This is to be run after the field
// types change.
//
// For historical reasons, the order of rationalization and sectioning is
// context-dependent: Usually, sectioning comes first. But for server
// predictions (or `legacy_order` is true), parts of the rationalization
// happens before sectioning.
// TODO(crbug.com/408497919): Make the order consistent.
void RationalizeAndAssignSections(LogManager* log_manager,
bool legacy_order = false);
// Returns predictions that can be sent to the renderer process for debugging.
FormDataPredictions GetFieldTypePredictions() const;
// Creates FormStructure that has bare minimum information for uploading
// votes, namely form and field signatures. Warning: do not use for Autofill
// code, since it is likely missing some fields.
static std::unique_ptr<FormStructure> CreateForPasswordManagerUpload(
FormSignature form_signature,
const std::vector<FieldSignature>& field_signatures);
// Return the form signature as string.
std::string FormSignatureAsStr() const;
// Runs a quick heuristic to rule out forms that are obviously not
// auto-fillable, like google/yahoo/msn search, etc.
bool IsAutofillable() const;
// This enum defines two different states of completeness for a credit card
// form, each used for a distinct purpose to check if the required credit card
// fields exist.
enum class CreditCardFormCompleteness {
// This represents a minimal complete credit card form which has at least a
// credit card number field and an expiration date field.
kCompleteCreditCardForm,
// This represents a credit card form which has a CVC field and a cardholder
// name field in addition to the credit card number field and the expiration
// date field. For example, this level is required for offering `Save and
// Fill`.
kCompleteCreditCardFormIncludingCvcAndName,
};
// Returns whether |this| form represents a complete Credit Card form, as
// defined by the given CreditCardFormCompleteness level.
bool IsCompleteCreditCardForm(
CreditCardFormCompleteness credit_card_form_completeness) const;
// Returns true if this form matches the structural requirements for Autofill.
[[nodiscard]] bool ShouldBeParsed(LogManager* log_manager = nullptr) const {
return ShouldBeParsed({}, log_manager);
}
// Returns true if heuristic autofill type detection should be attempted for
// this form.
bool ShouldRunHeuristics() const;
// Returns true if autofill's heuristic field type detection should be
// attempted for this form given that |kMinRequiredFieldsForHeuristics| is not
// met.
bool ShouldRunHeuristicsForSingleFields() const;
// Returns true if we should query the crowd-sourcing server to determine this
// form's field types. If the form includes author-specified types, this will
// return false unless there are password fields in the form. If there are no
// password fields the assumption is that the author has expressed their
// intent and crowdsourced data should not be used to override this. Password
// fields are different because there is no way to specify password generation
// directly.
bool ShouldBeQueried() const;
// Returns true if we should upload Autofill votes for this form to the
// crowd-sourcing server. It is not applied for Password Manager votes.
bool ShouldBeUploaded() const;
// Returns whether the form is considered parseable and meets a couple of
// other requirements which makes uploading UKM data worthwhile. E.g. the
// form should not be a search form, the forms should have at least one
// focusable input field with a type from heuristics or the server.
bool ShouldUploadUkm(bool require_classified_field) const;
// This enum defines the behavior of RetrieveFromCache, which needs to adapt
// to the reason for retrieving data from the cache.
enum class RetrieveFromCacheReason {
// kFormCacheUpdateWithoutParsing and kFormCacheUpdateAfterParsing refer to
// the process of parsing the form and/or storing the result in
// AutofillManager's form cache when the renderer sends a new or potentially
// updated FormData object. Form parsing is the process of assigning field
// types to fields when the renderer notifies the browser about a new,
// modified or interacted with form.
//
// When the browser receives a FormData object from the renderer, it is
// converted to a FormStructure object. After that, the FormStructure is
// parsed if it changed significantly since the last parse.
// RetrieveFromCache is responsible for retaining information from the
// history of the fields in the form (e.g. information about previous fill
// operations):
//
// - The `is_autofilled` and similar members of a field are copied from the
// cached form so that a field that was once labeled as autofilled remains
// autofilled.
//
// - The `value` of a field is copied from the cache as it represents the
// initial value of a field during page load time and must not be updated
// if a form is parsed a second time.
// TODO: crbug.com/40227496 - Update documentation about `value` when
// kAutofillFixValueSemantics is launched.
//
// - Server predictions are also preserved.
//
// - Heuristic predictions are preserved only for
// kFormCacheUpdateWithoutParsing. They are discarded for
// kFormCacheUpdateAfterParsing because they are generated during parsing.
kFormCacheUpdateWithoutParsing,
kFormCacheUpdateAfterParsing,
// kFormImport refers to the process of importing address profiles / credit
// cards from user-filled forms after a form submission.
//
// During form import, the browser receives a FormData object from the
// renderer that is first converted to a FormStructure object.
// RetrieveFromCache is responsible for processing the FormData so that the
// FormStructure contains the right information that facilitate importing.
// Therefore, similar work happens as for kFormCacheUpdateAfterParse,
// except:
//
// - During form import, we want to copy field type information from
// previous parse operations as these tell which information to save.
//
// - The `value` of a FormStructure's field typically represents the
// initially observed value of a field during page load. So during
// kFormCacheUpdateAfterParse the value is persisted. During import,
// however, we want to store the last observed value. Furthermore, if the
// submitted value of a field has never been changed, we ignore the
// previous value from import (unless it's a state or country as websites
// can find meaningful default values via GeoIP).
// TODO: crbug.com/40227496 - Update documentation about `value` when
// kAutofillFixValueSemantics is launched.
//
// TODO: crbug.com/40227496 - When kAutofillFixValueSemantics is launched,
// kFormImport behaves identical to kFormCacheUpdateWithoutParsing. Consider
// renaming the enum constants or, better yet, removing the entire enum
// then.
kFormImport,
};
// Assumes that `*this` is FormStructure which was freshly created from a
// FormData object that the renderer sent to the browser and copies relevant
// information from a `cached_form` to `*this`. Depending on the passed
// `reason`, a different subset of data can be copied.
void RetrieveFromCache(const FormStructure& cached_form,
RetrieveFromCacheReason reason);
// Rationalize phone number fields so that, in every section, only the first
// complete phone number is filled automatically. This is useful for when a
// form contains a first phone number and second phone number, which usually
// should be distinct.
void RationalizePhoneNumberFieldsForFilling();
// Rationalize the form's autocomplete attributes, repeated fields and field
// type predictions.
void RationalizeFormStructure(LogManager* log_manager);
// Returns the FieldGlobalIds of the |fields_| that are eligible for manual
// filling on form interaction.
static std::vector<FieldGlobalId> FindFieldsEligibleForManualFilling(
const std::vector<raw_ptr<FormStructure, VectorExperimental>>& forms);
// See FormFieldData::fields.
const std::vector<std::unique_ptr<AutofillField>>& fields() const {
return fields_;
}
const AutofillField* field(size_t index) const;
AutofillField* field(size_t index);
size_t field_count() const;
const AutofillField* GetFieldById(FieldGlobalId field_id) const;
AutofillField* GetFieldById(FieldGlobalId field_id);
// Used for iterating over the fields.
std::vector<std::unique_ptr<AutofillField>>::const_iterator begin() const {
return fields_.begin();
}
std::vector<std::unique_ptr<AutofillField>>::const_iterator end() const {
return fields_.end();
}
const std::u16string& form_name() const { return form_name_; }
const std::u16string& id_attribute() const { return id_attribute_; }
const std::u16string& name_attribute() const { return name_attribute_; }
const GURL& source_url() const { return source_url_; }
const GURL& full_source_url() const { return full_source_url_; }
const GURL& target_url() const { return target_url_; }
const url::Origin& main_frame_origin() const { return main_frame_origin_; }
const ButtonTitleList& button_titles() const { return button_titles_; }
// Returns whether the form comes from an HTML form with a <form> tag.
bool is_form_element() const;
base::TimeTicks form_parsed_timestamp() const {
return form_parsed_timestamp_;
}
std::optional<base::TimeTicks> last_filling_timestamp() const {
return last_filling_timestamp_;
}
void set_last_filling_timestamp(base::TimeTicks last_filling_timestamp) {
last_filling_timestamp_ = last_filling_timestamp;
}
bool may_run_autofill_ai_model() const { return may_run_autofill_ai_model_; }
void set_may_run_autofill_ai_model(bool may_run_autofill_ai_model) {
may_run_autofill_ai_model_ = may_run_autofill_ai_model;
}
FormSignature form_signature() const { return form_signature_; }
void set_form_signature(FormSignature signature) {
form_signature_ = signature;
}
FormSignature alternative_form_signature() const {
return alternative_form_signature_;
}
void set_alternative_form_signature(FormSignature signature) {
alternative_form_signature_ = signature;
}
// Returns a FormData containing the data this form structure knows about.
FormData ToFormData() const;
// Returns the possible form types.
DenseSet<FormType> GetFormTypes() const;
mojom::SubmissionSource submission_source() const {
return submission_source_;
}
void set_submission_source(mojom::SubmissionSource submission_source) {
submission_source_ = submission_source;
}
int developer_engagement_metrics() const {
return developer_engagement_metrics_;
}
const LanguageCode& current_page_language() const {
return current_page_language_;
}
void set_current_page_language(LanguageCode language) {
current_page_language_ = std::move(language);
}
FormGlobalId global_id() const { return {host_frame_, renderer_id_}; }
FormVersion version() const { return version_; }
const GeoIpCountryCode& client_country() const { return client_country_; }
// The signatures of forms recently submitted on the same origin within a
// small period of time.
struct FormAssociations {
std::optional<FormSignature> last_address_form_submitted;
std::optional<FormSignature> second_last_address_form_submitted;
std::optional<FormSignature> last_credit_card_form_submitted;
};
base::flat_map<FieldGlobalId, AutofillType::ServerPrediction>
GetServerPredictions(const std::vector<FieldGlobalId>& field_ids) const;
base::flat_map<FieldGlobalId, FieldType> GetHeuristicPredictions(
HeuristicSource source,
const std::vector<FieldGlobalId>& field_ids) const;
private:
friend class FormStructureTestApi;
// Sets the rank of each field in the form.
void DetermineFieldRanks();
// Classifies each field using the regular expressions. The classifications
// are returned, but not assigned to the `fields_` yet. Use
// `AssignBestFieldTypes()` to do so.
[[nodiscard]] FieldCandidatesMap ParseFieldTypesWithPatterns(
ParsingContext& context) const;
// Assigns the best heuristic types from the `field_type_map` to the heuristic
// types of the corresponding fields for the `pattern_source`.
void AssignBestFieldTypes(const FieldCandidatesMap& field_type_map,
HeuristicSource heuristic_source);
void LogDetermineHeuristicTypesMetrics();
// Sets each field's `html_type` and `html_mode` based on the field's
// `parsed_autocomplete` member.
void SetFieldTypesFromAutocompleteAttribute();
// Production code only uses the default parameters.
// Unit tests also test other parameters.
struct ShouldBeParsedParams {
size_t min_required_fields =
std::min({kMinRequiredFieldsForHeuristics, kMinRequiredFieldsForQuery,
kMinRequiredFieldsForUpload});
size_t required_fields_for_forms_with_only_password_fields =
kRequiredFieldsForFormsWithOnlyPasswordFields;
};
FormStructure(FormSignature form_signature,
const std::vector<FieldSignature>& field_signatures);
[[nodiscard]] bool ShouldBeParsed(ShouldBeParsedParams params,
LogManager* log_manager = nullptr) const;
// Extracts the parseable field name by removing a common affix.
void ExtractParseableFieldNames();
// Extract parseable field labels by potentially splitting labels between
// adjacent fields.
void ExtractParseableFieldLabels();
// The country where the user is currently located. Used to introduce biases
// in form parsing and understanding according to the user's location.
GeoIpCountryCode client_country_;
// The language detected for this form's page, before any translations
// performed by Chrome.
LanguageCode current_page_language_;
// The id attribute of the form.
std::u16string id_attribute_;
// The name attribute of the form.
std::u16string name_attribute_;
// The name of the form.
std::u16string form_name_;
// The titles of form's buttons.
ButtonTitleList button_titles_;
// The source URL (excluding the query parameters and fragment identifiers).
GURL source_url_;
// The full source URL including query parameters and fragment identifiers.
// If `kAutofillIncludeUrlInCrowdsourcing` is disabled, this value should only
// be set for password forms.
GURL full_source_url_;
// The target URL.
GURL target_url_;
// The origin of the main frame of this form.
// |main_frame_origin| represents the main frame (not necessarily primary
// main frame) of the form's frame tree as described by MPArch nested frame
// trees. For details, see RenderFrameHost::GetMainFrame().
url::Origin main_frame_origin_;
// A vector of all the input fields in the form.
// See FormFieldData::fields.
std::vector<std::unique_ptr<AutofillField>> fields_;
// Indicates whether the client may run the AutofillAI model for this form.
bool may_run_autofill_ai_model_ = false;
// The unique signature for this form, composed of the target url domain,
// the form name, and the form field names in a 64-bit hash.
FormSignature form_signature_;
// The alternative signature for this form which is more stable/generic than
// `form_signature_`, used when signature is random/unstable at each reload.
// It is composed of the target url domain, the fields' form control types,
// and for forms with 1-2 fields, one of the following non-empty elements
// ordered by preference: path, reference, or query in a 64-bit hash.
FormSignature alternative_form_signature_;
// The timestamp (not wallclock time) when this form was initially parsed.
base::TimeTicks form_parsed_timestamp_;
// The timestamp when this form or one of its fields was last filled.
std::optional<base::TimeTicks> last_filling_timestamp_;
// Used to record whether developer has used autocomplete markup or
// UPI-VPA hints, This is a bitmask of DeveloperEngagementMetric and set in
// DetermineHeuristicTypes().
int developer_engagement_metrics_ = 0;
mojom::SubmissionSource submission_source_ = mojom::SubmissionSource::NONE;
// True iff queries encoded from this form structure should include rich
// form/field metadata.
bool is_rich_query_enabled_ = false;
// A unique identifier of the containing frame.
// This value must not be leaked to other renderer processes.
LocalFrameToken host_frame_;
// A monotonically increasing counter that indicates the generation of the
// form.
FormVersion version_;
// An identifier of the form that is unique among the forms from the same
// frame.
FormRendererId renderer_id_;
// A vector of all iframes in the form.
std::vector<FrameTokenWithPredecessor> child_frames_;
};
LogBuffer& operator<<(LogBuffer& buffer, const FormStructure& form);
std::ostream& operator<<(std::ostream& buffer, const FormStructure& form);
} // namespace autofill
#endif // COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_STRUCTURE_H_
|