File: title_validator.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (80 lines) | stat: -rw-r--r-- 2,289 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/continuous_search/common/title_validator.h"

#include <algorithm>
#include <string_view>

#include "base/containers/adapters.h"
#include "base/logging.h"
#include "base/strings/string_util.h"

namespace continuous_search {

namespace {

// Based on frame.mojom `kMaxTitleChars`.
constexpr size_t kMaxLength = 4096;

// A unicode control character is any character in the set:
// {[U0000, U0020), U007F}
// We explicitly permit U000B LINE TABULATION in keeping with the implementation
// in blink::Documents CanonicalizedTitle method.
bool IsUnicodeWhitespaceOrControl(wchar_t c) {
  return (c < 0x0020 || c == 0x007F || base::IsUnicodeWhitespace(c)) &&
         c != 0x000B;
}

template <typename T, typename CharT = typename T::value_type>
std::basic_string<CharT> ValidateTitleT(T input) {
  auto begin_it =
      std::ranges::find_if_not(input, &IsUnicodeWhitespaceOrControl);
  auto end_it = std::ranges::find_if_not(base::Reversed(input),
                                         &IsUnicodeWhitespaceOrControl);

  std::basic_string<CharT> output;
  if (input.empty() || begin_it == input.end()) {
    return output;
  }

  const size_t first = begin_it - input.begin();
  const size_t last = std::distance(input.begin(), end_it.base());
  DCHECK_GT(last, first);  // Invariant based on the find_if algorithm.
  const size_t length = last - first;
  const size_t max_output_size = std::min(length, kMaxLength);
  output.resize(max_output_size);

  size_t output_pos = 0;
  bool in_whitespace = false;
  for (auto c : input.substr(first, length)) {
    if (IsUnicodeWhitespaceOrControl(c)) {
      if (!in_whitespace) {
        in_whitespace = true;
        output[output_pos++] = L' ';
      }
    } else {
      in_whitespace = false;
      output[output_pos++] = c;
    }
    if (output_pos == kMaxLength) {
      break;
    }
  }

  output.resize(output_pos);
  return output;
}

}  // namespace

std::string ValidateTitleAscii(std::string_view title) {
  return ValidateTitleT(title);
}

std::u16string ValidateTitle(std::u16string_view title) {
  return ValidateTitleT(title);
}

}  // namespace continuous_search