File: tokenized_string_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (124 lines) | stat: -rw-r--r-- 4,213 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chromeos/ash/components/string_matching/tokenized_string.h"

#include <stddef.h>

#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace ash::string_matching {

namespace {

std::u16string GetContent(const TokenizedString& tokenized) {
  const TokenizedString::Tokens& tokens = tokenized.tokens();
  const TokenizedString::Mappings& mappings = tokenized.mappings();

  std::u16string str;
  for (size_t i = 0; i < tokens.size(); ++i) {
    if (i > 0)
      str += ' ';
    str += tokens[i];
    str += base::UTF8ToUTF16(mappings[i].ToString());
  }
  return str;
}

}  // namespace

TEST(TokenizedStringTest, Empty) {
  std::u16string empty;
  TokenizedString tokens(empty);
  EXPECT_EQ(std::u16string(), GetContent(tokens));
  TokenizedString token_words(empty, TokenizedString::Mode::kWords);
  EXPECT_EQ(std::u16string(), GetContent(token_words));
}

TEST(TokenizedStringTest, Basic) {
  {
    std::u16string text(u"a");
    TokenizedString tokens(text);
    EXPECT_EQ(u"a{0,1}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"a{0,1}", GetContent(token_words));
  }
  {
    std::u16string text(u"ScratchPad");
    TokenizedString tokens(text);
    EXPECT_EQ(u"scratch{0,7} pad{7,10}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"scratchpad{0,10}", GetContent(token_words));
  }
  {
    std::u16string text(u"Chess2.0");
    TokenizedString tokens(text);
    EXPECT_EQ(u"chess{0,5} 2.0{5,8}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"chess2.0{0,8}", GetContent(token_words));
  }
  {
    std::u16string text(u"Cut the rope");
    TokenizedString tokens(text);
    EXPECT_EQ(u"cut{0,3} the{4,7} rope{8,12}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"cut{0,3} the{4,7} rope{8,12}", GetContent(token_words));
  }
  {
    std::u16string text(u"AutoCAD WS");
    TokenizedString tokens(text);
    EXPECT_EQ(u"auto{0,4} cad{4,7} ws{8,10}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"autocad{0,7} ws{8,10}", GetContent(token_words));
  }
  {
    std::u16string text(u"Great TweetDeck");
    TokenizedString tokens(text);
    EXPECT_EQ(u"great{0,5} tweet{6,11} deck{11,15}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"great{0,5} tweetdeck{6,15}", GetContent(token_words));
  }
  {
    std::u16string text(u"Draw-It!");
    TokenizedString tokens(text);
    EXPECT_EQ(u"draw{0,4} it{5,7}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"draw-it{0,7}", GetContent(token_words));
  }
  {
    std::u16string text(u"Faxing & Signing");
    TokenizedString tokens(text);
    EXPECT_EQ(u"faxing{0,6} signing{9,16}", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"faxing{0,6} signing{9,16}", GetContent(token_words));
  }
  {
    std::u16string text(u"!@#$%^&*()<<<**>>>");
    TokenizedString tokens(text);
    EXPECT_EQ(u"", GetContent(tokens));
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"", GetContent(token_words));
  }
}

TEST(TokenizedStringTest, TokenizeWords) {
  {
    std::u16string text(u"?! wi-fi abc@gmail.com?!");
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"wi-fi{3,8} abc@gmail.com{9,22}", GetContent(token_words));
  }
  {
    std::u16string text(u"Hello?! \t \b   World! ");
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"hello{0,5} world{14,19}", GetContent(token_words));
  }
  {
    std::u16string text(u" ?|! *&");
    TokenizedString token_words(text, TokenizedString::Mode::kWords);
    EXPECT_EQ(u"", GetContent(token_words));
  }
}

}  // namespace ash::string_matching