File: tailored_word_break_iterator_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (156 lines) | stat: -rw-r--r-- 6,710 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <stddef.h>

#include <string>
#include <vector>

#include "base/i18n/break_iterator.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "components/omnibox/browser/tailored_word_break_iterator.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace {
struct Piece {
  std::u16string string;
  bool is_word;
};

void VerifyBreaks(std::u16string str, std::vector<Piece> expected_pieces) {
  TailoredWordBreakIterator iter(str);
  ASSERT_TRUE(iter.Init());

  for (size_t i = 0; i < expected_pieces.size(); ++i) {
    ASSERT_TRUE(iter.Advance()) << base::StringPrintf(
        "Expected %zu pieces; found %zu pieces.\n", expected_pieces.size(), i);
    EXPECT_TRUE(iter.IsWord() == expected_pieces[i].is_word &&
                iter.GetString() == expected_pieces[i].string)
        << base::StringPrintf(
               "Expected {%s, %d}; found {%s, %d}.\n",
               base::UTF16ToUTF8(expected_pieces[i].string).c_str(),
               expected_pieces[i].is_word,
               base::UTF16ToUTF8(iter.GetString()).c_str(), iter.IsWord());
  }

  iter.Advance();
  ASSERT_EQ(iter.pos(), base::i18n::BreakIterator::npos) << base::StringPrintf(
      "Expected %zu pieces; found more pieces; found {%s, %d}.\n",
      expected_pieces.size(), base::UTF16ToUTF8(iter.GetString()).c_str(),
      iter.IsWord());
  EXPECT_FALSE(iter.IsWord());
  EXPECT_FALSE(iter.Advance());
  EXPECT_FALSE(iter.IsWord());
}
}  // namespace

TEST(TailoredWordBreakIterator, BreakWord) {
  VerifyBreaks(u"_foo_bar!_\npouet_boom", {
                                              {u"_", false},
                                              {u"foo", true},
                                              {u"_", false},
                                              {u"bar", true},
                                              {u"!", false},
                                              {u"_", false},
                                              {u"\n", false},
                                              {u"pouet", true},
                                              {u"_", false},
                                              {u"boom", true},
                                          });
}

TEST(TailoredWordBreakIterator, TrailingUnderscore) {
  VerifyBreaks(u"_foo_bar_", {
                                 {u"_", false},
                                 {u"foo", true},
                                 {u"_", false},
                                 {u"bar", true},
                                 {u"_", false},
                             });
}

TEST(TailoredWordBreakIterator, RepeatingUnderscore) {
  VerifyBreaks(u"Viktor...Ambartsumian", {
                                             {u"Viktor", true},
                                             {u".", false},
                                             {u".", false},
                                             {u".", false},
                                             {u"Ambartsumian", true},
                                         });

  VerifyBreaks(u"Viktor___Ambartsumian", {
                                             {u"Viktor", true},
                                             {u"_", false},
                                             {u"_", false},
                                             {u"_", false},
                                             {u"Ambartsumian", true},
                                         });

  VerifyBreaks(u"Viktor_..///.__Ambartsumian", {
                                                   {u"Viktor", true},
                                                   {u"_", false},
                                                   {u".", false},
                                                   {u".", false},
                                                   {u"/", false},
                                                   {u"/", false},
                                                   {u"/", false},
                                                   {u".", false},
                                                   {u"_", false},
                                                   {u"_", false},
                                                   {u"Ambartsumian", true},
                                               });
}

TEST(TailoredWordBreakIterator, Numerics) {
  VerifyBreaks(u"chr0m3 15 aw350m3", {
                                         {u"chr", true},
                                         {u"0", true},
                                         {u"m", true},
                                         {u"3", true},
                                         {u" ", false},
                                         {u"15", true},
                                         {u" ", false},
                                         {u"aw", true},
                                         {u"350", true},
                                         {u"m", true},
                                         {u"3", true},
                                     });
}

TEST(TailoredWordBreakIterator, NumericsAndUnderscores) {
  VerifyBreaks(u"chr0m3__15__aw350m3", {
                                           {u"chr", true},
                                           {u"0", true},
                                           {u"m", true},
                                           {u"3", true},
                                           {u"_", false},
                                           {u"_", false},
                                           {u"15", true},
                                           {u"_", false},
                                           {u"_", false},
                                           {u"aw", true},
                                           {u"350", true},
                                           {u"m", true},
                                           {u"3", true},
                                       });

  VerifyBreaks(u"Viktor Ambartsumian_is__anAwesome99_99Astrophysicist!!",
               {
                   {u"Viktor", true},
                   {u" ", false},
                   {u"Ambartsumian", true},
                   {u"_", false},
                   {u"is", true},
                   {u"_", false},
                   {u"_", false},
                   {u"anAwesome", true},
                   {u"99", true},
                   {u"_", false},
                   {u"99", true},
                   {u"Astrophysicist", true},
                   {u"!", false},
                   {u"!", false},
               });
}