File: acronym_matcher_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (123 lines) | stat: -rw-r--r-- 4,163 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chromeos/ash/components/string_matching/acronym_matcher.h"

#include "base/containers/adapters.h"
#include "chromeos/ash/components/string_matching/tokenized_string.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace ash::string_matching {

namespace {

using acronym_matcher_constants::kIsFrontOfTokenCharScore;
using acronym_matcher_constants::kIsPrefixCharScore;
using acronym_matcher_constants::kNoMatchScore;

constexpr double kAbsError = 1e-5;

// Returns a string of |text| marked with the hits in |match| using block
// bracket. e.g. text= "Text", match.hits = [{0,1}], returns "[T]ext".
//
// TODO(crbug.com/1336160): Consider defining it as a |test_util| function as it
// has been used for several unit tests.
std::u16string MatchHit(const std::u16string& text,
                        const AcronymMatcher& match) {
  std::u16string marked = text;

  const AcronymMatcher::Hits& hits = match.hits();
  for (const gfx::Range& hit : base::Reversed(hits)) {
    marked.insert(hit.end(), 1, u']');
    marked.insert(hit.start(), 1, u'[');
  }

  return marked;
}

}  // namespace

class AcronymMatcherTest : public testing::Test {};

// Note on expected score calculations:
//
// When a query successfully matches to a text, each letter of the query
// contributes some amount towards a final total. The expected score in
// each test is then the sum over all of the contributions of the individual
// query letters. This is described in more detail in acronym_matcher.cc.
//
// When a query does not successfully match to a text, the overall expected
// score is `kNoMatchScore`.

TEST_F(AcronymMatcherTest, ConsecutiveTokensWithFirstTokenMatch) {
  TokenizedString query(u"abc");
  TokenizedString text(u"axx bxx cxx dxx exx");

  AcronymMatcher am(query, text);
  double expected_score = kIsPrefixCharScore + (kIsFrontOfTokenCharScore * 2);
  EXPECT_NEAR(am.CalculateRelevance(), expected_score, kAbsError);
}

TEST_F(AcronymMatcherTest, ConsecutiveTokensWithNonFirstTokenMatch) {
  TokenizedString query(u"bcd");
  TokenizedString text(u"axx bxx cxx dxx exx");

  AcronymMatcher am(query, text);
  double expected_score = kIsFrontOfTokenCharScore * 3;
  EXPECT_NEAR(am.CalculateRelevance(), expected_score, kAbsError);
}

TEST_F(AcronymMatcherTest, CaseInsensitive) {
  TokenizedString query(u"bCd");
  TokenizedString text(u"axx Bxx cxx Dxx exx");

  AcronymMatcher am(query, text);
  double expected_score = kIsFrontOfTokenCharScore * 3;
  EXPECT_NEAR(am.CalculateRelevance(), expected_score, kAbsError);
}

// PrefixMatcher matches the chars of a given query as prefix of tokens in
// a given text. E.g, query "abc" is a prefix matching of both text "abc dxx"
// and "zxx abcx".
TEST_F(AcronymMatcherTest, PrefixMatchingNotAllowed) {
  TokenizedString query(u"abc def");
  TokenizedString text(u"abc def ghi");

  AcronymMatcher am(query, text);
  double expected_score = kNoMatchScore;
  EXPECT_NEAR(am.CalculateRelevance(), expected_score, kAbsError);
}

TEST_F(AcronymMatcherTest, MixedAcronymAndPrefixMatchingNotAllowed) {
  TokenizedString query(u"adefg");
  TokenizedString text(u"abc def ghi");

  AcronymMatcher am(query, text);
  double expected_score = kNoMatchScore;
  EXPECT_NEAR(am.CalculateRelevance(), expected_score, kAbsError);
}

TEST_F(AcronymMatcherTest, MatchHit) {
  struct {
    const std::u16string text;
    const std::u16string query;
    const std::u16string expect;
  } kTestCases[] = {
      {u"Crash of Crowns", u"coc", u"[C]rash [o]f [C]rowns"},
      {u"Crash of Crowns", u"cra", u"Crash of Crowns"},
      {u"abcxxx bxxx cxxx", u"abc", u"[a]bcxxx [b]xxx [c]xxx"},
      {u"xxx abcxxx bxxx cxxx", u"abc", u"xxx [a]bcxxx [b]xxx [c]xxx"},
  };

  for (auto& test_case : kTestCases) {
    const TokenizedString query(test_case.query);
    const TokenizedString text(test_case.text);

    AcronymMatcher am(query, text);
    am.CalculateRelevance();
    EXPECT_EQ(test_case.expect, MatchHit(test_case.text, am));
  }
}

}  // namespace ash::string_matching