1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/common/autofill_regexes.h"
#include <tuple>
#include "base/check.h"
#include "base/containers/to_vector.h"
#include "base/i18n/unicodestring.h"
#include "base/memory/ptr_util.h"
#include "base/not_fatal_until.h"
namespace {
// Maximum length of the string to match to avoid causing an icu::RegexMatcher
// stack overflow. (crbug.com/1198219)
constexpr int kMaxStringLength = 5000;
} // namespace
namespace autofill {
std::unique_ptr<const icu::RegexPattern> CompileRegex(
std::u16string_view regex) {
const icu::UnicodeString icu_regex(false, regex.data(), regex.length());
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::RegexPattern> regex_pattern = base::WrapUnique(
icu::RegexPattern::compile(icu_regex, UREGEX_CASE_INSENSITIVE, status));
DCHECK(U_SUCCESS(status));
return regex_pattern;
}
bool MatchesRegex(std::u16string_view input,
const icu::RegexPattern& regex_pattern,
std::vector<std::u16string>* groups) {
if (input.size() > kMaxStringLength)
return false;
UErrorCode status = U_ZERO_ERROR;
// `icu_input` must outlive `regex_matcher` because it holds a reference to
// it.
icu::UnicodeString icu_input(false, input.data(), input.length());
std::unique_ptr<icu::RegexMatcher> regex_matcher =
base::WrapUnique(regex_pattern.matcher(icu_input, status));
UBool matched = regex_matcher->find(0, status);
DCHECK(U_SUCCESS(status));
if (matched && groups) {
int32_t matched_groups = regex_matcher->groupCount();
groups->resize(matched_groups + 1);
for (int32_t i = 0; i < matched_groups + 1; ++i) {
icu::UnicodeString match_unicode = regex_matcher->group(i, status);
DCHECK(U_SUCCESS(status));
(*groups)[i] = base::i18n::UnicodeStringToString16(match_unicode);
}
}
return matched;
}
std::optional<std::vector<std::u16string>> SplitByRegex(
std::u16string_view input,
const icu::RegexPattern& regex_pattern,
size_t max_groups) {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString icu_input(false, input.data(), input.length());
std::vector<icu::UnicodeString> parts(max_groups);
int32_t part_count =
regex_pattern.split(icu_input, parts.data(), parts.size(), status);
if (U_FAILURE(status) || part_count <= 0) {
return std::nullopt;
}
parts.resize(part_count);
return base::ToVector(parts, &base::i18n::UnicodeStringToString16);
}
AutofillRegexCache::AutofillRegexCache(ThreadSafe thread_safe)
: thread_safe_(thread_safe) {
if (!thread_safe_)
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
AutofillRegexCache::~AutofillRegexCache() {
if (!thread_safe_)
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
const icu::RegexPattern* AutofillRegexCache::GetRegexPattern(
std::u16string_view regex) {
auto GetOrCreate = [&] {
auto it = cache_.find(regex);
if (it == cache_.end()) {
bool success;
std::tie(it, success) =
cache_.emplace(std::u16string(regex), CompileRegex(regex));
DCHECK(success);
}
CHECK(it != cache_.end());
DCHECK(it->second.get());
return it->second.get();
};
if (!thread_safe_) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
return GetOrCreate();
}
base::AutoLock lock(lock_);
return GetOrCreate();
}
} // namespace autofill
|