1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/omnibox_text_util.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "url/url_constants.h"
namespace omnibox {
std::u16string StripJavascriptSchemas(const std::u16string& text) {
const std::u16string kJsPrefix(
base::StrCat({url::kJavaScriptScheme16, u":"}));
bool found_JavaScript = false;
size_t i = 0;
// Find the index of the first character that isn't whitespace, a control
// character, or a part of a JavaScript: scheme.
while (i < text.size()) {
if (base::IsUnicodeWhitespace(text[i]) || (text[i] < 0x20)) {
++i;
} else {
if (!base::EqualsCaseInsensitiveASCII(text.substr(i, kJsPrefix.length()),
kJsPrefix)) {
break;
}
// We've found a JavaScript scheme. Continue searching to ensure that
// strings like "javascript:javascript:alert()" are fully stripped.
found_JavaScript = true;
i += kJsPrefix.length();
}
}
// If we found any "JavaScript:" schemes in the text, return the text starting
// at the first non-whitespace/control character after the last instance of
// the scheme.
if (found_JavaScript) {
return text.substr(i);
}
return text;
}
std::u16string SanitizeTextForPaste(const std::u16string& text) {
if (text.empty()) {
return std::u16string(); // Nothing to do.
}
size_t end = text.find_first_not_of(base::kWhitespaceUTF16);
if (end == std::u16string::npos) {
return u" "; // Convert all-whitespace to single space.
}
// Because `end` points at the first non-whitespace character, the loop
// below will skip leading whitespace.
// Reserve space for the sanitized output.
std::u16string output;
output.reserve(text.size()); // Guaranteed to be large enough.
// Copy all non-whitespace sequences.
// Do not copy trailing whitespace.
// Copy all other whitespace sequences that do not contain CR/LF.
// Convert all other whitespace sequences that do contain CR/LF to either ' '
// or nothing, depending on whether there are any other sequences that do not
// contain CR/LF.
bool output_needs_lf_conversion = false;
bool seen_non_lf_whitespace = false;
const auto copy_range = [&text, &output](size_t begin, size_t end) {
output +=
text.substr(begin, (end == std::u16string::npos) ? end : (end - begin));
};
constexpr char16_t kNewline[] = {'\n', 0};
constexpr char16_t kSpace[] = {' ', 0};
while (true) {
// Copy this non-whitespace sequence.
size_t begin = end;
end = text.find_first_of(base::kWhitespaceUTF16, begin + 1);
copy_range(begin, end);
// Now there is either a whitespace sequence, or the end of the string.
if (end != std::u16string::npos) {
// There is a whitespace sequence; see if it contains CR/LF.
begin = end;
end = text.find_first_not_of(base::kWhitespaceNoCrLfUTF16, begin);
if ((end != std::u16string::npos) && (text[end] != '\n') &&
(text[end] != '\r')) {
// Found a non-trailing whitespace sequence without CR/LF. Copy it.
seen_non_lf_whitespace = true;
copy_range(begin, end);
continue;
}
}
// `end` either points at the end of the string or a CR/LF.
if (end != std::u16string::npos) {
end = text.find_first_not_of(base::kWhitespaceUTF16, end + 1);
}
if (end == std::u16string::npos) {
break; // Ignore any trailing whitespace.
}
// The preceding whitespace sequence contained CR/LF. Convert to a single
// LF that we'll fix up below the loop.
output_needs_lf_conversion = true;
output += '\n';
}
// Convert LFs to ' ' or '' depending on whether there were non-LF whitespace
// sequences.
if (output_needs_lf_conversion) {
base::ReplaceChars(output, kNewline,
seen_non_lf_whitespace ? kSpace : std::u16string(),
&output);
}
return StripJavascriptSchemas(output);
}
} // namespace omnibox
|