File: autofill_data_util_unittest.cc

package info (click to toggle)
chromium-browser 57.0.2987.98-1~deb8u1
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 2,637,852 kB
  • ctags: 2,544,394
  • sloc: cpp: 12,815,961; ansic: 3,676,222; python: 1,147,112; asm: 526,608; java: 523,212; xml: 286,794; perl: 92,654; sh: 86,408; objc: 73,271; makefile: 27,698; cs: 18,487; yacc: 13,031; tcl: 12,957; pascal: 4,875; ml: 4,716; lex: 3,904; sql: 3,862; ruby: 1,982; lisp: 1,508; php: 1,368; exp: 404; awk: 325; csh: 117; jsp: 39; sed: 37
file content (187 lines) | stat: -rw-r--r-- 7,216 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/core/browser/autofill_data_util.h"

#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/autofill_test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace autofill {
namespace data_util {

TEST(AutofillDataUtilTest, IsCJKName) {
  typedef struct {
    const char* full_name;
    bool is_cjk;
  } TestCase;

  TestCase test_cases[] = {
     // Non-CJK language with only ASCII characters.
    {"Homer Jay Simpson", false},
    // Non-CJK language with some ASCII characters.
    {"Éloïse Paré", false},
    // Non-CJK language with no ASCII characters.
    {"Σωκράτης", false},

    // (Simplified) Chinese name, Unihan.
    {"刘翔", true},
    // (Simplified) Chinese name, Unihan, with an ASCII space.
    {"成 龙", true},
    // Korean name, Hangul.
    {"송지효", true},
    // Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000).
    {"김 종국", true},
    // Japanese name, Unihan.
    {"山田貴洋", true},
    // Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB).
    {"ビル・ゲイツ", true},
    // Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a typo).
    {"ビル·ゲイツ", true},

    // CJK names don't have a middle name, so a 3-part name is bogus to us.
    {"반 기 문", false}
  };

  for (const TestCase& test_case : test_cases) {
    EXPECT_EQ(test_case.is_cjk,
              IsCJKName(base::UTF8ToUTF16(test_case.full_name)))
        << "Failed for: " << test_case.full_name;
  }
}

TEST(AutofillDataUtilTest, SplitName) {
  typedef struct {
    std::string full_name;
    std::string given_name;
    std::string middle_name;
    std::string family_name;

  } TestCase;

  const TestCase test_cases[] = {
      // Full name including given, middle and family names.
      {"Homer Jay Simpson", "Homer", "Jay", "Simpson"},
      // No middle name.
      {"Moe Szyslak", "Moe", "", "Szyslak"},
      // Common name prefixes removed.
      {"Reverend Timothy Lovejoy", "Timothy", "", "Lovejoy"},
      // Common name suffixes removed.
      {"John Frink Phd", "John", "", "Frink"},
      // Exception to the name suffix removal.
      {"John Ma", "John", "", "Ma"},
      // Common family name prefixes not considered a middle name.
      {"Milhouse Van Houten", "Milhouse", "", "Van Houten"},

      // CJK names have reverse order (surname goes first, given name goes
      // second).
      {"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan
      {"孫 德明", "德明", "", "孫"}, // Chinese name, Unihan, 'IDEOGRAPHIC SPACE'
      {"홍 길동", "길동", "", "홍"}, // Korean name, Hangul
      {"山田 貴洋", "貴洋", "", "山田"}, // Japanese name, Unihan

      // In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a
      // separator. There is no consensus for the ordering. For now, we use the
      // same ordering as regular Japanese names ("last・first").
      {"ゲイツ・ビル", "ビル", "", "ゲイツ"}, // Foreign name in Japanese, Katakana
      // 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' (U+00B7).
      {"ゲイツ·ビル", "ビル", "", "ゲイツ"}, // Foreign name in Japanese, Katakana

      // CJK names don't usually have a space in the middle, but most of the
      // time, the surname is only one character (in Chinese & Korean).
      {"최성훈", "성훈", "", "최"}, // Korean name, Hangul
      {"刘翔", "翔", "", "刘"}, // (Simplified) Chinese name, Unihan
      {"劉翔", "翔", "", "劉"}, // (Traditional) Chinese name, Unihan

      // There are a few exceptions. Occasionally, the surname has two
      // characters.
      {"남궁도", "도", "", "남궁"}, // Korean name, Hangul
      {"황보혜정", "혜정", "", "황보"}, // Korean name, Hangul
      {"歐陽靖", "靖", "", "歐陽"}, // (Traditional) Chinese name, Unihan

      // In Korean, some 2-character surnames are rare/ambiguous, like "강전":
      // "강" is a common surname, and "전" can be part of a given name. In
      // those cases, we assume it's 1/2 for 3-character names, or 2/2 for
      // 4-character names.
      {"강전희", "전희", "", "강"}, // Korean name, Hangul
      {"황목치승", "치승", "", "황목"}, // Korean name, Hangul

      // It occasionally happens that a full name is 2 characters, 1/1.
      {"이도", "도", "", "이"}, // Korean name, Hangul
      {"孫文", "文", "", "孫"} // Chinese name, Unihan
  };

  for (TestCase test_case : test_cases) {
    NameParts name_parts = SplitName(base::UTF8ToUTF16(test_case.full_name));

    EXPECT_EQ(base::UTF8ToUTF16(test_case.given_name), name_parts.given);
    EXPECT_EQ(base::UTF8ToUTF16(test_case.middle_name), name_parts.middle);
    EXPECT_EQ(base::UTF8ToUTF16(test_case.family_name), name_parts.family);
  }
}

TEST(AutofillDataUtilTest, JoinNameParts) {
  typedef struct {
    std::string given_name;
    std::string middle_name;
    std::string family_name;
    std::string full_name;
  } TestCase;

  TestCase test_cases[] = {
    // Full name including given, middle and family names.
    {"Homer", "Jay", "Simpson", "Homer Jay Simpson"},
    // No middle name.
    {"Moe", "", "Szyslak", "Moe Szyslak"},

    // CJK names have reversed order, no space.
    {"德明", "", "孫", "孫德明"}, // Chinese name, Unihan
    {"길동", "", "홍", "홍길동"}, // Korean name, Hangul
    {"貴洋", "", "山田", "山田貴洋"}, // Japanese name, Unihan

    // These are no CJK names for us, they're just bogus.
    {"Homer", "", "シンプソン", "Homer シンプソン"},
    {"ホーマー", "", "Simpson", "ホーマー Simpson"},
    {"반", "기", "문", "반 기 문"} // Has a middle-name, too unusual
  };

  for (const TestCase& test_case : test_cases) {
    base::string16 joined = JoinNameParts(
        base::UTF8ToUTF16(test_case.given_name),
        base::UTF8ToUTF16(test_case.middle_name),
        base::UTF8ToUTF16(test_case.family_name));

    EXPECT_EQ(base::UTF8ToUTF16(test_case.full_name), joined);
  }
}

TEST(AutofillDataUtilTest, ProfileMatchesFullName) {
  autofill::AutofillProfile profile;
  autofill::test::SetProfileInfo(
      &profile, "First", "Middle", "Last", "fml@example.com", "Acme inc",
      "123 Main", "Apt 2", "Laredo", "TX", "77300", "US", "832-555-1000");

  EXPECT_TRUE(ProfileMatchesFullName(base::UTF8ToUTF16("First Last"), profile));

  EXPECT_TRUE(
      ProfileMatchesFullName(base::UTF8ToUTF16("First Middle Last"), profile));

  EXPECT_TRUE(
      ProfileMatchesFullName(base::UTF8ToUTF16("First M Last"), profile));

  EXPECT_TRUE(
      ProfileMatchesFullName(base::UTF8ToUTF16("First M. Last"), profile));

  EXPECT_TRUE(
      ProfileMatchesFullName(base::UTF8ToUTF16("Last First"), profile));

  EXPECT_TRUE(
      ProfileMatchesFullName(base::UTF8ToUTF16("LastFirst"), profile));

  EXPECT_FALSE(
      ProfileMatchesFullName(base::UTF8ToUTF16("Kirby Puckett"), profile));
}

}  // namespace data_util
}  // namespace autofill