1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/continuous_search/common/title_validator.h"
#include <algorithm>
#include <string_view>
#include "base/containers/adapters.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
namespace continuous_search {
namespace {
// Based on frame.mojom `kMaxTitleChars`.
constexpr size_t kMaxLength = 4096;
// A unicode control character is any character in the set:
// {[U0000, U0020), U007F}
// We explicitly permit U000B LINE TABULATION in keeping with the implementation
// in blink::Documents CanonicalizedTitle method.
bool IsUnicodeWhitespaceOrControl(wchar_t c) {
return (c < 0x0020 || c == 0x007F || base::IsUnicodeWhitespace(c)) &&
c != 0x000B;
}
template <typename T, typename CharT = typename T::value_type>
std::basic_string<CharT> ValidateTitleT(T input) {
auto begin_it =
std::ranges::find_if_not(input, &IsUnicodeWhitespaceOrControl);
auto end_it = std::ranges::find_if_not(base::Reversed(input),
&IsUnicodeWhitespaceOrControl);
std::basic_string<CharT> output;
if (input.empty() || begin_it == input.end()) {
return output;
}
const size_t first = begin_it - input.begin();
const size_t last = std::distance(input.begin(), end_it.base());
DCHECK_GT(last, first); // Invariant based on the find_if algorithm.
const size_t length = last - first;
const size_t max_output_size = std::min(length, kMaxLength);
output.resize(max_output_size);
size_t output_pos = 0;
bool in_whitespace = false;
for (auto c : input.substr(first, length)) {
if (IsUnicodeWhitespaceOrControl(c)) {
if (!in_whitespace) {
in_whitespace = true;
output[output_pos++] = L' ';
}
} else {
in_whitespace = false;
output[output_pos++] = c;
}
if (output_pos == kMaxLength) {
break;
}
}
output.resize(output_pos);
return output;
}
} // namespace
std::string ValidateTitleAscii(std::string_view title) {
return ValidateTitleT(title);
}
std::u16string ValidateTitle(std::u16string_view title) {
return ValidateTitleT(title);
}
} // namespace continuous_search
|