File: page_passwords_analyser.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (474 lines) | stat: -rw-r--r-- 19,779 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/content/renderer/page_passwords_analyser.h"

#include <algorithm>
#include <map>
#include <stack>
#include <string>
#include <vector>

#include "base/containers/contains.h"
#include "base/lazy_instance.h"
#include "base/memory/raw_ptr.h"
#include "base/strings/strcat.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "components/autofill/content/renderer/form_autofill_util.h"
#include "components/autofill/content/renderer/page_form_analyser_logger.h"
#include "components/autofill/content/renderer/password_form_conversion_utils.h"
#include "third_party/blink/public/web/web_document.h"
#include "third_party/blink/public/web/web_element.h"
#include "third_party/blink/public/web/web_element_collection.h"
#include "third_party/blink/public/web/web_form_control_element.h"
#include "third_party/blink/public/web/web_label_element.h"
#include "third_party/blink/public/web/web_node.h"
#include "third_party/re2/src/re2/re2.h"

using blink::WebDocument;
using blink::WebElement;
using blink::WebElementCollection;
using blink::WebFormControlElement;
using blink::WebFormElement;
using blink::WebInputElement;
using blink::WebLabelElement;
using blink::WebLocalFrame;
using blink::WebNode;
using blink::WebString;

namespace autofill {

namespace {

const char kDocumentationUrl[] = "https://goo.gl/9p2vKq";
const char* kTypeAttributes[] = {"text", "email", "tel", "password"};
const char* kTypeTextAttributes[] = {"text", "email", "tel"};
char kTextFieldSignature = 'T';
char kPasswordFieldSignature = 'P';

// Produce a relevant link to developer documentation regarding the warning or
// error. If no particular reference is given, the default URL will be provided.
// Otherwise, the URL will point to the specified anchor.
std::string LinkDocumentation(const std::string& message,
                              const char* reference = nullptr) {
  std::string documented = message + " (More info: " + kDocumentationUrl + ")";
  if (reference)
    return documented + std::string("#") + reference;
  return documented;
}

// A simple wrapper that provides some extra data about nodes
// during the DOM traversal (e.g. whether it lies within a <form>
// element, which is necessary for some of the warnings).
struct TraversalInfo {
  const WebNode node;
  const bool in_form;
};

// Collects the important elements in a form that are
// relevant to the Password Manager, which consists of the text and password
// inputs in a form, as well as their ordering.
struct FormInputCollection {
  WebFormElement form;
  std::vector<WebFormControlElement> inputs;
  std::vector<size_t> text_inputs;
  std::vector<size_t> password_inputs;
  std::vector<size_t> explicit_password_inputs;
  std::string signature;

  // The signature of a form is a string of 'T's and 'P's, representing
  // username and password fields respectively. This is used to quickly match
  // against well-known <input> patterns to guess what kind of form we are
  // dealing with, and provide intelligent autocomplete suggestions.
  void AddInput(const WebFormControlElement& input) {
    std::string type(
        input.HasAttribute("type") ? input.GetAttribute("type").Utf8() : "");
    signature +=
        type != "password" ? kTextFieldSignature : kPasswordFieldSignature;
    if (type != "password") {
      text_inputs.push_back(inputs.size());
    } else {
      password_inputs.push_back(inputs.size());
      if (input.HasAttribute("autocomplete")) {
        // There are some warnings we only throw if we are certain that a
        // password field is actually a password (rather than a credit card
        // security code, etc.).
        std::string autocomplete(input.GetAttribute("autocomplete").Utf8());
        if (autocomplete == "current-password" ||
            autocomplete == "new-password")
          explicit_password_inputs.push_back(inputs.size());
      }
    }
    inputs.push_back(input);
  }
};

#define DECLARE_LAZY_MATCHER(NAME, PATTERN)                                   \
  struct LabelPatternLazyInstanceTraits_##NAME                                \
      : public base::internal::DestructorAtExitLazyInstanceTraits<re2::RE2> { \
    static re2::RE2* New(void* instance) {                                    \
      return CreateMatcher(instance, PATTERN);                                \
    }                                                                         \
  };                                                                          \
  base::LazyInstance<re2::RE2, LabelPatternLazyInstanceTraits_##NAME> NAME =  \
      LAZY_INSTANCE_INITIALIZER

DECLARE_LAZY_MATCHER(ignored_characters_matcher, R"(\W)");
DECLARE_LAZY_MATCHER(username_matcher, R"(user(name)?|login)");
DECLARE_LAZY_MATCHER(email_matcher, R"(email(address)?)");
DECLARE_LAZY_MATCHER(telephone_matcher, R"((mobile)?(telephone)?(number|no))");

#undef DECLARE_LAZY_MATCHER

// Represents a common <label> content text-pattern that indicates
// something of the purpose of an element (for example: that it is a username
// field).
struct InputHint {
  raw_ptr<const re2::RE2> regex;
  size_t match;

  explicit InputHint(const re2::RE2* regex)
      : regex(regex), match(std::string::npos) {}

  void MatchLabel(std::string& label_content, size_t index) {
    if (re2::RE2::FullMatch(label_content, *regex))
      match = index;
  }
};

// Multiple semantic forms may be contained within a single <form> element,
// which causes confusion to the Password Manager, which acts under the
// assumption each <form> element corresponds to a single form.
// |FormIsTooComplex| uses a simple heuristic to guess whether a form may
// contain too many inputs to be considered a single form.
bool FormIsTooComplex(const std::string& signature) {
  unsigned kind_changes = 0;
  unsigned password_count = 0;
  for (const char kind : signature) {
    if (kind ==
        (kind_changes & 1 ? kTextFieldSignature : kPasswordFieldSignature))
      ++kind_changes;
    password_count += kind == kPasswordFieldSignature;
  }
  return kind_changes >= 3 || password_count > 3;
}

// Stores an element's id in |ids| for duplicity-checking.
void TrackElementId(const WebElement& element,
                    std::map<std::string, std::vector<WebNode>>* nodes_for_id) {
  if (element.HasAttribute("id")) {
    std::string id_attr = element.GetAttribute("id").Utf8();
    (*nodes_for_id)[id_attr].push_back(element);
  }
}

// We don't want to re-analyze the same nodes each time the method is
// called. This technically means some warnings might be overlooked (for
// example if an invalid attribute is added), but these cases are assumed
// to be rare, and are ignored for the sake of simplicity.
// The id of |node| will additionally be added to the corresponding |ids| set.
template <typename RendererId>
bool TrackElementByRendererIdIfUntracked(
    const WebElement& element,
    const RendererId renderer_id,
    std::set<RendererId>* skip_renderer_ids,
    std::map<std::string, std::vector<WebNode>>* nodes_for_id) {
  if (skip_renderer_ids->count(renderer_id))
    return true;
  skip_renderer_ids->insert(renderer_id);
  TrackElementId(element, nodes_for_id);
  return false;
}

// Error and warning messages regarding the DOM structure: missing <form> tags,
// duplicate ids, etc. Returns a list of the forms found in the DOM for further
// analysis.
std::vector<FormInputCollection> ExtractFormsForAnalysis(
    const WebDocument& document,
    std::set<FormRendererId>* skip_form_ids,
    std::set<FieldRendererId>* skip_control_ids,
    PageFormAnalyserLogger* logger) {
  std::vector<FormInputCollection> form_input_collections;

  // Keep track of inputs that are inside <form> elements to find the complement
  // for warnings afterwards.
  std::set<WebFormControlElement> inputs_with_forms;
  std::map<std::string, std::vector<WebNode>> nodes_for_id;

  for (const WebFormElement& form : document.GetTopLevelForms()) {
    form_input_collections.push_back(FormInputCollection{form});
    // Collect all the inputs in the form.
    for (const WebFormControlElement& input : form.GetFormControlElements()) {
      if (TrackElementByRendererIdIfUntracked(
              input, form_util::GetFieldRendererId(input), skip_control_ids,
              &nodes_for_id)) {
        continue;
      }
      // We are only interested in a subset of input elements -- those likely
      // to be username or password fields.
      if (input.TagName() == "INPUT" &&
          (!input.HasAttribute("type") ||
           base::Contains(kTypeAttributes,
                          input.GetAttribute("type").Utf8()))) {
        form_input_collections.back().AddInput(input);
        inputs_with_forms.insert(input);
      }
    }
    TrackElementByRendererIdIfUntracked(
        form, form_util::GetFormRendererId(form), skip_form_ids, &nodes_for_id);
  }

  // Check for password fields that are not contained inside forms.
  auto password_inputs = document.QuerySelectorAll("input[type=\"password\"]");
  for (const WebElement& password_input : password_inputs) {
    const WebInputElement input_element =
        password_input.DynamicTo<WebInputElement>();
    if (!input_element) {
      continue;
    }
    if (TrackElementByRendererIdIfUntracked(
            password_input, form_util::GetFieldRendererId(input_element),
            skip_control_ids, &nodes_for_id)) {
      continue;
    }
    // Any password fields inside <form> elements will have been skipped,
    // leaving just those without associated forms.
    logger->Send(
        LinkDocumentation("Password field is not contained in a form:"),
        PageFormAnalyserLogger::kVerbose, password_input);
  }
  // Check for input fields that are not contained inside forms, to make sure
  // their id attributes don't conflict with other fields also not contained
  // inside forms.
  std::string selector = "input:not([type])";
  for (const char* text_type : kTypeTextAttributes)
    base::StrAppend(&selector, {", input[type=\"", text_type, "\"]"});
  auto text_inputs = document.QuerySelectorAll(WebString::FromUTF8(selector));
  for (const WebElement& text_input : text_inputs) {
    const WebInputElement input_element =
        text_input.DynamicTo<WebInputElement>();
    if (!input_element) {
      continue;
    }
    TrackElementByRendererIdIfUntracked(
        text_input, form_util::GetFieldRendererId(input_element),
        skip_control_ids, &nodes_for_id);
  }
  // Warn against elements sharing an id attribute. Duplicate id attributes both
  // are against the HTML specification and can cause issues with password
  // saving/filling, as the Password Manager makes the assumption that ids may
  // be used as a unique identifier for nodes.
  for (const auto& pair : nodes_for_id) {
    const std::string& id_attr = pair.first;
    const std::vector<WebNode>& nodes = pair.second;
    if (nodes.size() <= 1)
      continue;
    logger->Send(LinkDocumentation(base::StringPrintf(
                     "Found %zu elements with non-unique id #%s:", nodes.size(),
                     id_attr.c_str())),
                 PageFormAnalyserLogger::kWarning, nodes);
  }

  return form_input_collections;
}

// The username field is the most difficult field to identify, as there
// are often many other textual fields in a form, and it is not always
// possible to work out which one is the username. Here, we find any
// <label> elements pointing to the input fields, and check their content.
// Labels containing text such as "Username:" or "Email address:" are
// likely to indicate the desired field, and will be prioritized over
// other fields.
void InferUsernameField(
    const WebFormElement& form,
    const std::vector<WebFormControlElement>& inputs,
    size_t username_field_guess,
    std::map<size_t, std::string>* autocomplete_suggestions) {
  WebElementCollection labels(form.GetElementsByHTMLTagName("label"));
  DCHECK(labels);

  std::vector<InputHint> input_hints;

  input_hints.emplace_back(username_matcher.Pointer());
  input_hints.emplace_back(email_matcher.Pointer());
  input_hints.emplace_back(telephone_matcher.Pointer());

  for (WebElement item = labels.FirstItem(); item; item = labels.NextItem()) {
    WebLabelElement label(item.To<WebLabelElement>());
    WebElement control(label.CorrespondingControl());
    if (control && control.IsFormControlElement()) {
      WebFormControlElement form_control(control.To<WebFormControlElement>());
      auto found = std::ranges::find(inputs, form_control);
      if (found != inputs.end()) {
        std::string label_content(
            base::UTF16ToUTF8(form_util::FindChildText(label)));
        // Reduce to plain-text, as labels often contain extra punctuation.
        re2::RE2::GlobalReplace(&label_content,
                                ignored_characters_matcher.Get(), "");
        for (InputHint& input_hint : input_hints)
          input_hint.MatchLabel(label_content, found - inputs.begin());
      }
    }
  }

  for (InputHint& input_hint : input_hints) {
    if (input_hint.match != std::string::npos) {
      username_field_guess = input_hint.match;
      break;
    }
  }

  (*autocomplete_suggestions)[username_field_guess] = "username";
}

// Infer what kind of form a form corresponds to (e.g. a
// registration, log-in or password reset form), based on the structure of
// the form.
void GuessAutocompleteAttributesForPasswordFields(
    const std::vector<size_t>& password_inputs,
    bool has_text_field,
    std::map<size_t, std::string>* autocomplete_suggestions) {
  size_t password_count = password_inputs.size();
  switch (password_count) {
    case 3:
      (*autocomplete_suggestions)[password_inputs[0]] = "current-password";
      [[fallthrough]];  // To match the last two password fields.
    case 2:
      (*autocomplete_suggestions)[password_inputs[password_count - 2]] =
          "new-password";
      (*autocomplete_suggestions)[password_inputs[password_count - 1]] =
          "new-password";
      break;
    case 1:
      (*autocomplete_suggestions)[password_inputs[password_count - 1]] =
          has_text_field ? "current-password" : "new-password";
      break;
  }
}

// Error and warning messages specific to an individual form (for example,
// autocomplete attributes, or missing username fields, etc.).
void AnalyseForm(const FormInputCollection& form_input_collection,
                 PageFormAnalyserLogger* logger) {
  const WebFormElement& form = form_input_collection.form;
  const std::vector<WebFormControlElement>& inputs =
      form_input_collection.inputs;
  const std::vector<size_t>& text_inputs = form_input_collection.text_inputs;
  const std::vector<size_t>& explicit_password_inputs =
      form_input_collection.explicit_password_inputs;
  const std::vector<size_t>& password_inputs =
      form_input_collection.password_inputs;
  const std::string& signature = form_input_collection.signature;

  // We're only interested in forms that contain password fields.
  if (password_inputs.empty())
    return;

  bool has_text_field = !text_inputs.empty();
  size_t username_field_guess =
      0;  // Give it a default value to keep the compiler happy.

  // In order to decrease number of messages and chance of false positives show
  // username suggestions only when password fields are annotated.
  if (!explicit_password_inputs.empty()) {
    if (!has_text_field || text_inputs[0] > explicit_password_inputs[0]) {
      // There is no formal requirement to have associated username fields for
      // every password field, but providing one ensures that the Password
      // Manager associates the correct account name with the password (for
      // example in password reset forms).
      logger->Send(
          LinkDocumentation("Password forms should have (optionally hidden) "
                            "username fields for accessibility:"),
          PageFormAnalyserLogger::kVerbose, form);
    } else {
      // By default (if the other heuristics fail), the first text field
      // preceding a password field will be considered the username field.
      for (username_field_guess = explicit_password_inputs[0] - 1;;
           --username_field_guess) {
        DCHECK(username_field_guess < signature.size());
        if (signature[username_field_guess] == kTextFieldSignature)
          break;
      }
    }
  }

  if (FormIsTooComplex(signature)) {
    logger->Send(
        LinkDocumentation(
            "Multiple forms should be contained in their own "
            "form elements; break up complex forms into ones that represent a "
            "single action:"),
        PageFormAnalyserLogger::kVerbose, form);
    return;
  }

  // The autocomplete attribute provides valuable hints to the Password
  // Manager as to the semantic structure of a form. Rather than simply point
  // out that an autocomplete attribute would be useful, we try to suggest the
  // intended value of the autocomplete attribute in order to save time for
  // the developer.
  std::map<size_t, std::string> autocomplete_suggestions;
  // If there are no password fields that have been explicitly declared
  // passwords, we don't suggest an autocomplete="username" attribute, to stop
  // false positives associated with credit card details.
  if (!explicit_password_inputs.empty() && has_text_field &&
      text_inputs[0] < explicit_password_inputs[0]) {
    InferUsernameField(form, inputs, username_field_guess,
                       &autocomplete_suggestions);
  }

  GuessAutocompleteAttributesForPasswordFields(password_inputs, has_text_field,
                                               &autocomplete_suggestions);

  // For each input element that is not annotated with an autocomplete
  // attribute, if we have a guess for what function the input serves, log
  // a warning, suggesting that the inferred attribute value should be added.
  for (size_t i = 0; i < inputs.size(); ++i) {
    if (autocomplete_suggestions.count(i) &&
        !inputs[i].HasAttribute("autocomplete"))
      logger->Send(LinkDocumentation("Input elements should have autocomplete "
                                     "attributes (suggested: \"" +
                                     autocomplete_suggestions[i] + "\"):"),
                   PageFormAnalyserLogger::kVerbose, inputs[i]);
  }
}

}  // namespace

// Out-of-line definitions to keep [chromium-style] happy.
PagePasswordsAnalyser::PagePasswordsAnalyser() = default;

PagePasswordsAnalyser::~PagePasswordsAnalyser() = default;

void PagePasswordsAnalyser::Reset() {
  skip_control_element_renderer_ids_.clear();
  skip_form_element_renderer_ids_.clear();
}

void PagePasswordsAnalyser::AnalyseDocumentDOM(WebLocalFrame* frame,
                                               PageFormAnalyserLogger* logger) {
  DCHECK(frame);

  WebDocument document(frame->GetDocument());
  // Extract all the forms from the DOM, and provide relevant warnings.
  std::vector<FormInputCollection> forms(
      ExtractFormsForAnalysis(document, &skip_form_element_renderer_ids_,
                              &skip_control_element_renderer_ids_, logger));

  // Analyze each form in turn, for example with respect to autocomplete
  // attributes.
  for (const FormInputCollection& form_input_collection : forms)
    AnalyseForm(form_input_collection, logger);

  // Finally, send all the warnings and errors to the console.
  logger->Flush();
}

void PagePasswordsAnalyser::AnalyseDocumentDOM(WebLocalFrame* frame) {
  PageFormAnalyserLogger logger(frame);
  AnalyseDocumentDOM(frame, &logger);
}

}  // namespace autofill