1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/geo/address_rewriter.h"
#include <memory>
#include <string_view>
#include <unordered_map>
#include "base/i18n/case_conversion.h"
#include "base/no_destructor.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/geo/grit/autofill_address_rewriter_resources_map.h"
#include "third_party/re2/src/re2/re2.h"
#include "third_party/zlib/google/compression_utils.h"
#include "ui/base/resource/resource_bundle.h"
namespace autofill {
namespace {
// Aliases for the types used by the compiled rules cache.
using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, std::string>;
using CompiledRuleVector = std::vector<CompiledRule>;
using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>;
// Helper function to convert region to mapping key string.
std::string GetMapKey(const std::string& region) {
return base::StrCat({"IDR_ADDRESS_REWRITER_", region, "_RULES"});
}
// Helper function to extract region rules data.
std::string ExtractRegionRulesData(const std::string& region) {
std::string resource_key = GetMapKey(region);
for (const webui::ResourcePath& resource :
kAutofillAddressRewriterResources) {
if (resource.path == resource_key) {
std::string_view raw_resource =
ui::ResourceBundle::GetSharedInstance().GetRawDataResource(
resource.id);
std::string data;
compression::GzipUncompress(raw_resource, &data);
return data;
}
}
return std::string();
}
// Helper function to populate |compiled_rules| by parsing |data_string|.
void CompileRulesFromData(const std::string& data_string,
CompiledRuleVector* compiled_rules) {
std::string_view data = data_string;
re2::RE2::Options options;
options.set_encoding(RE2::Options::EncodingUTF8);
options.set_word_boundary(true);
size_t token_end = 0;
while (!data.empty()) {
token_end = data.find('\t');
auto pattern =
std::make_unique<re2::RE2>(data.substr(0, token_end), options);
data.remove_prefix(token_end + 1);
token_end = data.find('\n');
std::string rewrite_string(data.substr(0, token_end));
compiled_rules->emplace_back(std::move(pattern), std::move(rewrite_string));
data.remove_prefix(token_end + 1);
}
}
// The cache of compiled string replacement rules, keyed by region. This class
// is a singleton that compiles the rules for a given region the first time
// they are requested.
class Cache {
public:
// Return the singleton instance of the cache.
static Cache* GetInstance() {
static base::NoDestructor<Cache> instance;
return instance.get();
}
Cache(const Cache&) = delete;
Cache& operator=(const Cache&) = delete;
// If the rules for |region| have already been compiled and cached, return a
// pointer to them. Otherwise, find the rules for |region| (returning nullptr
// if there are no such rules exist), compile them, cache them, and return a
// pointer to the cached rules.
const CompiledRuleVector* GetRulesForRegion(const std::string& region) {
// Take the lock so that we don't update the data cache concurrently. Note
// that the returned data is const and can be concurrently accessed, just
// not the data cache.
base::AutoLock auto_lock(lock_);
// If we find a cached set of rules, return a pointer to the data.
auto cache_iter = data_.find(region);
if (cache_iter != data_.end()) {
return &cache_iter->second;
}
// Cache miss. Look for the raw rules. If none, then return nullptr.
std::string region_rules = ExtractRegionRulesData(region);
if (region_rules.empty()) {
return nullptr;
}
// Add a new rule vector to the cache and populate it with compiled rules.
CompiledRuleVector& compiled_rules = data_[region];
CompileRulesFromData(region_rules, &compiled_rules);
// Return a pointer to the data.
return &compiled_rules;
}
// Uses a string of data to create and return a pointer to a
// CompiledRuleVector. Used for creating unit_tests.
const CompiledRuleVector* CreateRulesForData(const std::string& data) {
// Compiled rules vector must be kept in cache to be used elsewhere.
CompiledRuleVector& compiled_rules = data_[data];
CompileRulesFromData(data, &compiled_rules);
// Return a pointer to the data.
return &compiled_rules;
}
private:
Cache() = default;
// Synchronizes access to |data_|, ensuring that a given set of rules is
// only compiled once.
base::Lock lock_;
// The cache of compiled rules, keyed by region.
CompiledRuleCache data_;
friend class base::NoDestructor<Cache>;
};
} // namespace
// static
std::u16string AddressRewriter::RewriteForCountryCode(
const AddressCountryCode& country_code,
const std::u16string& normalized_text) {
AddressRewriter rewriter = AddressRewriter::ForCountryCode(country_code);
return rewriter.Rewrite(normalized_text);
}
// static
AddressRewriter AddressRewriter::ForCountryCode(
const AddressCountryCode& country_code) {
const std::string region = base::ToUpperASCII(country_code.value());
const CompiledRuleVector* rules =
Cache::GetInstance()->GetRulesForRegion(region);
AddressRewriter rewriter;
rewriter.impl_ = rules;
return rewriter;
}
// static
AddressRewriter AddressRewriter::ForCustomRules(
const std::string& custom_rules) {
const CompiledRuleVector* rules =
Cache::GetInstance()->CreateRulesForData(custom_rules);
AddressRewriter rewriter;
rewriter.impl_ = rules;
return rewriter;
}
std::u16string AddressRewriter::Rewrite(const std::u16string& text) const {
if (impl_ == nullptr) {
return base::CollapseWhitespace(text, true);
}
// Apply all of the string replacement rules. We don't have to worry about
// whitespace during these passes because the patterns are all whitespace
// tolerant regular expressions.
std::string utf8_text = base::UTF16ToUTF8(text);
for (const auto& rule : *static_cast<const CompiledRuleVector*>(impl_)) {
RE2::GlobalReplace(&utf8_text, *rule.first, rule.second);
}
// Collapse whitespace before returning the final value.
return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true));
}
} // namespace autofill
|