File: spellcheck_multilingual_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (275 lines) | stat: -rw-r--r-- 11,268 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include <stddef.h>

#include <algorithm>
#include <array>
#include <memory>
#include <string_view>
#include <utility>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "base/path_service.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/task_environment.h"
#include "components/spellcheck/common/spellcheck_common.h"
#include "components/spellcheck/common/spellcheck_result.h"
#include "components/spellcheck/renderer/empty_local_interface_provider.h"
#include "components/spellcheck/renderer/spellcheck.h"
#include "components/spellcheck/renderer/spellcheck_provider_test.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/platform/web_string.h"
#include "third_party/blink/public/web/web_text_checking_result.h"

namespace {

struct SpellcheckTestCase {
  // A string of text for checking.
  const wchar_t* input;
  // The position and the length of the first misspelled word, if any.
  size_t expected_misspelling_start;
  size_t expected_misspelling_length;
};

base::FilePath GetHunspellDirectory() {
  base::FilePath hunspell_directory;
  if (!base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT,
                              &hunspell_directory)) {
    return base::FilePath();
  }

  hunspell_directory = hunspell_directory.AppendASCII("third_party");
  hunspell_directory = hunspell_directory.AppendASCII("hunspell_dictionaries");
  return hunspell_directory;
}

}  // namespace

class MultilingualSpellCheckTest : public testing::Test {
 public:
  MultilingualSpellCheckTest() = default;

  void ReinitializeSpellCheck(const std::string& unsplit_languages) {
    spellcheck_ = new SpellCheck(&embedder_provider_);
    provider_ = std::make_unique<TestingSpellCheckProvider>(
        spellcheck_, &embedder_provider_);
    InitializeSpellCheck(unsplit_languages);
  }

  void InitializeSpellCheck(const std::string& unsplit_languages) {
    base::FilePath hunspell_directory = GetHunspellDirectory();
    EXPECT_FALSE(hunspell_directory.empty());
    std::vector<std::string> languages = base::SplitString(
        unsplit_languages, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

    for (const auto& language : languages) {
      base::File file(
          spellcheck::GetVersionedFileName(language, hunspell_directory),
          base::File::FLAG_OPEN | base::File::FLAG_READ);
      spellcheck_->AddSpellcheckLanguage(std::move(file), language);
    }
  }

  ~MultilingualSpellCheckTest() override = default;
  TestingSpellCheckProvider* provider() { return provider_.get(); }

 protected:
  void ExpectSpellCheckWordResults(const std::string& languages,
                                   const SpellcheckTestCase* test_cases,
                                   size_t num_test_cases) {
    ReinitializeSpellCheck(languages);

    for (size_t i = 0; i < num_test_cases; ++i) {
      size_t misspelling_start = 0;
      size_t misspelling_length = 0;
      static_cast<blink::WebTextCheckClient*>(provider())
          ->CheckSpelling(blink::WebString::FromUTF16(
                              base::WideToUTF16(test_cases[i].input)),
                          misspelling_start, misspelling_length, nullptr);

      EXPECT_EQ(test_cases[i].expected_misspelling_start, misspelling_start)
          << "Improper misspelling location found with the languages "
          << languages << " when checking \"" << test_cases[i].input << "\".";
      EXPECT_EQ(test_cases[i].expected_misspelling_length, misspelling_length)
          << "Improper misspelling length found with the languages "
          << languages << " when checking \"" << test_cases[i].input << "\".";
    }
  }

  void ExpectSpellCheckParagraphResults(
      const std::u16string& input,
      const std::vector<SpellCheckResult>& expected) {
    std::vector<blink::WebTextCheckingResult> results;
    spellcheck_->SpellCheckParagraph(input, provider_->GetSpellCheckHost(),
                                     &results);

    EXPECT_EQ(expected.size(), results.size());
    size_t size = std::min(results.size(), expected.size());
    for (size_t i = 0; i < size; ++i) {
      EXPECT_EQ(blink::kWebTextDecorationTypeSpelling, results[i].decoration);
      EXPECT_EQ(expected[i].location, results[i].location);
      EXPECT_EQ(expected[i].length, results[i].length);
    }
  }

 private:
  base::test::TaskEnvironment task_environment_;
  spellcheck::EmptyLocalInterfaceProvider embedder_provider_;

  // Owned by |provider_|.
  raw_ptr<SpellCheck, DanglingUntriaged> spellcheck_;
  std::unique_ptr<TestingSpellCheckProvider> provider_;
};

// Check that a string of different words is properly spellchecked for different
// combinations of different languages.
TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckWord) {
  static const SpellcheckTestCase kTestCases[] = {
      // An English, Spanish, Russian, and Greek word, all spelled correctly.
      {L"rocket destruyan \x0432\x0441\x0435\x0445 \x03C4\x03B9\x03C2", 0, 0},
      // A misspelled English word.
      {L"rocktt destruyan \x0432\x0441\x0435\x0445 \x03C4\x03B9\x03C2", 0, 6},
      // A misspelled Spanish word.
      {L"rocket destruynn \x0432\x0441\x0435\x0445 \x03C4\x03B9\x03C2", 7, 9},
      // A misspelled Russian word.
      {L"rocket destruyan \x0430\x0430\x0430\x0430 \x03C4\x03B9\x03C2", 17, 4},
      // A misspelled Greek word.
      {L"rocket destruyan \x0432\x0441\x0435\x0445 \x03B1\x03B1\x03B1\x03B1",
       22, 4},
      // An English word, then Russian, and then a misspelled English word.
      {L"rocket \x0432\x0441\x0435\x0445 rocktt", 12, 6},
  };

  // A sorted list of languages. This must start sorted to get all possible
  // permutations.
  std::string languages = "el-GR,en-US,es-ES,ru-RU";
  std::vector<std::string_view> permuted_languages = base::SplitStringPiece(
      languages, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

  do {
    std::string reordered_languages = base::JoinString(permuted_languages, ",");
    ExpectSpellCheckWordResults(reordered_languages, kTestCases,
                                std::size(kTestCases));
  } while (std::next_permutation(permuted_languages.begin(),
                                 permuted_languages.end()));
}

TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckWordEnglishSpanish) {
  static const SpellcheckTestCase kTestCases[] = {
      {L"", 0, 0},
      {L"head hand foot legs arms", 0, 0},
      {L"head hand foot legs arms zzzz", 25, 4},
      {L"head hand zzzz foot legs arms", 10, 4},
      {L"zzzz head hand foot legs arms", 0, 4},
      {L"zzzz head zzzz foot zzzz arms", 0, 4},
      {L"head hand foot arms zzzz zzzz", 20, 4},
      {L"I do not want a monstrous snake near me.", 0, 0},
      {L"zz do not want a monstrous snake near me.", 0, 2},
      {L"I do not want zz monstrous snake near me.", 14, 2},
      {L"I do not want a monstrous zz near me.", 26, 2},
      {L"I do not want a monstrou snake near me.", 16, 8},
      {L"I do not want a monstrous snake near zz.", 37, 2},
      {L"Partially Spanish is very bueno.", 0, 0},
      {L"Sleeping in the biblioteca is good.", 0, 0},
      {L"Hermano is my favorite name.", 0, 0},
      {L"hola hola hola hola hola hola", 0, 0},
      {L"sand hola hola hola hola hola", 0, 0},
      {L"hola sand sand sand sand sand", 0, 0},
      {L"sand sand sand sand sand hola", 0, 0},
      {L"sand hola sand hola sand hola", 0, 0},
      {L"hola sand hola sand hola sand", 0, 0},
      {L"hola:legs", 0, 9},
      {L"legs:hola", 0, 9}};
  ExpectSpellCheckWordResults("en-US,es-ES", kTestCases, std::size(kTestCases));
}

// If there are no spellcheck languages, no text should be marked as misspelled.
TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckParagraphBlank) {
  ReinitializeSpellCheck(std::string());

  ExpectSpellCheckParagraphResults(
      // English, German, Spanish, and a misspelled word.
      u"rocket Schwarzkommando destruyan pcnyhon",
      std::vector<SpellCheckResult>());
}

// Make sure nothing is considered misspelled when at least one of the selected
// languages determines that a word is correctly spelled.
TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckParagraphCorrect) {
  ReinitializeSpellCheck("en-US,es-ES,de-DE");

  ExpectSpellCheckParagraphResults(
      // English, German, and Spanish words, all spelled correctly.
      u"rocket Schwarzkommando destruyan", std::vector<SpellCheckResult>());
}

// Make sure that all the misspellings in the text are found.
TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckParagraph) {
  ReinitializeSpellCheck("en-US,es-ES");
  std::vector<SpellCheckResult> expected;
  expected.push_back(SpellCheckResult(SpellCheckResult::SPELLING, 7, 15));
  expected.push_back(SpellCheckResult(SpellCheckResult::SPELLING, 33, 7));

  ExpectSpellCheckParagraphResults(
      // English, German, Spanish, and a misspelled word.
      u"rocket Schwarzkommando destruyan pcnyhon", expected);
}

// Ensure that suggestions are handled properly for multiple languages.
TEST_F(MultilingualSpellCheckTest, MultilingualSpellCheckSuggestions) {
  ReinitializeSpellCheck("en-US,es-ES");
  struct TestCases {
    // A string of text for checking.
    const wchar_t* input;
    // The position and the length of the first invalid word.
    size_t expected_misspelling_start;
    size_t expected_misspelling_length;
    // A comma separated string of suggested words that should occur, in their
    // expected order.
    const wchar_t* expected_suggestions;
  };
  static const auto kTestCases = std::to_array<TestCases>({
      {L"rocket", 0, 0},
      {L"destruyan", 0, 0},
      {L"rocet", 0, 5, L"rocket,roce,crochet,troce,rocen"},
      {L"jum", 0, 3, L"hum,jun,ju,um,juma"},
      {L"asdne", 0, 5, L"sadness,desasne"},
  });

  for (size_t i = 0; i < std::size(kTestCases); ++i) {
    std::vector<blink::WebString> suggestions;
    size_t misspelling_start;
    size_t misspelling_length;
    static_cast<blink::WebTextCheckClient*>(provider())
        ->CheckSpelling(
            blink::WebString::FromUTF16(base::WideToUTF16(kTestCases[i].input)),
            misspelling_start, misspelling_length, &suggestions);

    EXPECT_EQ(kTestCases[i].expected_misspelling_start, misspelling_start);
    EXPECT_EQ(kTestCases[i].expected_misspelling_length, misspelling_length);
    if (!kTestCases[i].expected_suggestions) {
      EXPECT_EQ(0UL, suggestions.size());
      continue;
    }

    std::vector<std::u16string> expected_suggestions = base::SplitString(
        base::WideToUTF16(kTestCases[i].expected_suggestions),
        std::u16string(1, ','), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

    EXPECT_EQ(expected_suggestions.size(), suggestions.size());
    for (size_t j = 0;
         j < std::min(expected_suggestions.size(), suggestions.size()); j++) {
      EXPECT_EQ(expected_suggestions[j], suggestions[j].Utf16());
    }
  }
}