File: address_rewriter.cc

package info (click to toggle)
chromium 139.0.7258.138-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,120,676 kB
  • sloc: cpp: 35,100,869; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (190 lines) | stat: -rw-r--r-- 6,522 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/core/browser/geo/address_rewriter.h"

#include <memory>
#include <string_view>
#include <unordered_map>

#include "base/i18n/case_conversion.h"
#include "base/no_destructor.h"
#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/geo/grit/autofill_address_rewriter_resources_map.h"
#include "third_party/re2/src/re2/re2.h"
#include "third_party/zlib/google/compression_utils.h"
#include "ui/base/resource/resource_bundle.h"

namespace autofill {
namespace {

// Aliases for the types used by the compiled rules cache.
using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, std::string>;
using CompiledRuleVector = std::vector<CompiledRule>;
using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>;

// Helper function to convert region to mapping key string.
std::string GetMapKey(const std::string& region) {
  return base::StrCat({"IDR_ADDRESS_REWRITER_", region, "_RULES"});
}

// Helper function to extract region rules data.
std::string ExtractRegionRulesData(const std::string& region) {
  std::string resource_key = GetMapKey(region);
  for (const webui::ResourcePath& resource :
       kAutofillAddressRewriterResources) {
    if (resource.path == resource_key) {
      std::string_view raw_resource =
          ui::ResourceBundle::GetSharedInstance().GetRawDataResource(
              resource.id);
      std::string data;
      compression::GzipUncompress(raw_resource, &data);
      return data;
    }
  }

  return std::string();
}

// Helper function to populate |compiled_rules| by parsing |data_string|.
void CompileRulesFromData(const std::string& data_string,
                          CompiledRuleVector* compiled_rules) {
  std::string_view data = data_string;
  re2::RE2::Options options;
  options.set_encoding(RE2::Options::EncodingUTF8);
  options.set_word_boundary(true);

  size_t token_end = 0;
  while (!data.empty()) {
    token_end = data.find('\t');
    auto pattern =
        std::make_unique<re2::RE2>(data.substr(0, token_end), options);
    data.remove_prefix(token_end + 1);

    token_end = data.find('\n');
    std::string rewrite_string(data.substr(0, token_end));
    compiled_rules->emplace_back(std::move(pattern), std::move(rewrite_string));
    data.remove_prefix(token_end + 1);
  }
}

// The cache of compiled string replacement rules, keyed by region. This class
// is a singleton that compiles the rules for a given region the first time
// they are requested.
class Cache {
 public:
  // Return the singleton instance of the cache.
  static Cache* GetInstance() {
    static base::NoDestructor<Cache> instance;
    return instance.get();
  }

  Cache(const Cache&) = delete;
  Cache& operator=(const Cache&) = delete;

  // If the rules for |region| have already been compiled and cached, return a
  // pointer to them. Otherwise, find the rules for |region| (returning nullptr
  // if there are no such rules exist), compile them, cache them, and return a
  // pointer to the cached rules.
  const CompiledRuleVector* GetRulesForRegion(const std::string& region) {
    // Take the lock so that we don't update the data cache concurrently. Note
    // that the returned data is const and can be concurrently accessed, just
    // not the data cache.
    base::AutoLock auto_lock(lock_);

    // If we find a cached set of rules, return a pointer to the data.
    auto cache_iter = data_.find(region);
    if (cache_iter != data_.end()) {
      return &cache_iter->second;
    }

    // Cache miss. Look for the raw rules. If none, then return nullptr.
    std::string region_rules = ExtractRegionRulesData(region);
    if (region_rules.empty()) {
      return nullptr;
    }

    // Add a new rule vector to the cache and populate it with compiled rules.
    CompiledRuleVector& compiled_rules = data_[region];
    CompileRulesFromData(region_rules, &compiled_rules);

    // Return a pointer to the data.
    return &compiled_rules;
  }

  // Uses a string of data to create and return a pointer to a
  // CompiledRuleVector. Used for creating unit_tests.
  const CompiledRuleVector* CreateRulesForData(const std::string& data) {
    // Compiled rules vector must be kept in cache to be used elsewhere.
    CompiledRuleVector& compiled_rules = data_[data];
    CompileRulesFromData(data, &compiled_rules);

    // Return a pointer to the data.
    return &compiled_rules;
  }

 private:
  Cache() = default;

  // Synchronizes access to |data_|, ensuring that a given set of rules is
  // only compiled once.
  base::Lock lock_;

  // The cache of compiled rules, keyed by region.
  CompiledRuleCache data_;

  friend class base::NoDestructor<Cache>;
};

}  // namespace

// static
std::u16string AddressRewriter::RewriteForCountryCode(
    const AddressCountryCode& country_code,
    const std::u16string& normalized_text) {
  AddressRewriter rewriter = AddressRewriter::ForCountryCode(country_code);
  return rewriter.Rewrite(normalized_text);
}

// static
AddressRewriter AddressRewriter::ForCountryCode(
    const AddressCountryCode& country_code) {
  const std::string region = base::ToUpperASCII(country_code.value());
  const CompiledRuleVector* rules =
      Cache::GetInstance()->GetRulesForRegion(region);
  AddressRewriter rewriter;
  rewriter.impl_ = rules;
  return rewriter;
}

// static
AddressRewriter AddressRewriter::ForCustomRules(
    const std::string& custom_rules) {
  const CompiledRuleVector* rules =
      Cache::GetInstance()->CreateRulesForData(custom_rules);
  AddressRewriter rewriter;
  rewriter.impl_ = rules;
  return rewriter;
}

std::u16string AddressRewriter::Rewrite(const std::u16string& text) const {
  if (impl_ == nullptr) {
    return base::CollapseWhitespace(text, true);
  }

  // Apply all of the string replacement rules. We don't have to worry about
  // whitespace during these passes because the patterns are all whitespace
  // tolerant regular expressions.
  std::string utf8_text = base::UTF16ToUTF8(text);
  for (const auto& rule : *static_cast<const CompiledRuleVector*>(impl_)) {
    RE2::GlobalReplace(&utf8_text, *rule.first, rule.second);
  }

  // Collapse whitespace before returning the final value.
  return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true));
}

}  // namespace autofill