File: language_util.cc

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (170 lines) | stat: -rw-r--r-- 5,034 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/language/core/common/language_util.h"

#include <stddef.h>

#include <algorithm>
#include <string_view>

#include "base/feature_list.h"
#include "base/strings/strcat.h"
#include "components/country_codes/country_codes.h"
#include "components/language/core/common/language_experiments.h"
#include "components/language/core/common/locale_util.h"

namespace language {

namespace {

struct LanguageCodePair {
  // Code used in supporting list of Translate.
  const char* const translate_language;

  // Code used in Chrome internal.
  const char* const chrome_language;
};

// Some languages are treated as same languages in Translate even though they
// are different to be exact.
//
// If this table is updated, please sync this with the synonym table in
// chrome/browser/resources/settings/languages_page/languages.ts.
const LanguageCodePair kTranslateOnlySynonyms[] = {
    {"no", "nb"},
    {"id", "in"},
};

// Some languages have changed codes over the years and sometimes the older
// codes are used, so we must see them as synonyms.
//
// If this table is updated, please sync this with the synonym table in
// chrome/browser/resources/settings/languages_page/languages.ts.
const LanguageCodePair kLanguageCodeSynonyms[] = {
    {"gom", "kok"},
    {"iw", "he"},
    {"jw", "jv"},
    {"tl", "fil"},
};

// Some Chinese language codes are compatible with zh-TW or zh-CN in terms of
// Translate.
//
// If this table is updated, please sync this with the synonym table in
// chrome/browser/resources/settings/languages_page/languages.ts.
const LanguageCodePair kLanguageCodeChineseCompatiblePairs[] = {
    {"zh-TW", "zh-HK"},
    {"zh-TW", "zh-MO"},
    {"zh-CN", "zh-SG"},
};

}  // namespace

bool OverrideTranslateTriggerInIndia() {
#if BUILDFLAG(IS_ANDROID)
  if (base::FeatureList::IsEnabled(language::kDisableGeoLanguageModel)) {
    return false;
  }
  return country_codes::GetCurrentCountryID().CountryCode() == "IN";
#else
  return false;
#endif
}

OverrideLanguageModel GetOverrideLanguageModel() {
  // Note: when there are multiple possible override models, the overrides
  // ordering is important as it allows us to have concurrent overrides in
  // experiment without having to partition them explicitly.
  if (OverrideTranslateTriggerInIndia()) {
    return OverrideLanguageModel::GEO;
  }

  return OverrideLanguageModel::DEFAULT;
}

void ToTranslateLanguageSynonym(std::string* language) {
  // Get the base language (e.g. "es" for "es-MX")
  auto [main_part, tail_part] = language::SplitIntoMainAndTail(*language);

  if (main_part.empty()) {
    return;
  }

  if (main_part == "mni") {
    // "mni-Mtei" does not have any mapping and as such we leave it as is.
    return;
  }

  if (main_part == "zh") {
    // Chinese is a special case, there can be two base languages: traditional
    // and simplified. The kLanguageCodeChineseCompatiblePairs list contains the
    // relation between various Chinese locales. We need to return the code from
    // that mapping - if it exists.
    for (const auto& language_pair : kLanguageCodeChineseCompatiblePairs) {
      if (*language == language_pair.chrome_language) {
        *language = language_pair.translate_language;
        return;
      }
    }
    // Note that "zh" does not have any mapping and as such we leave it as is.
    // See https://crbug/798512 for more info.
    return;
  }

  if (main_part == "cmn") {
    // The Speech On-Device API (SODA) uses the Mandarin Chinese (cmn) language
    // codes.
    if (tail_part.rfind("-hant", 0) == 0) {
      *language = "zh-TW";
      return;
    }

    if (tail_part.rfind("-hans", 0) == 0) {
      *language = "zh-CN";
      return;
    }

    // If there is no matching script tag for cmn return zh.
    *language = "zh";
    return;
  }

  for (const auto& language_pair : kTranslateOnlySynonyms) {
    if (main_part == language_pair.chrome_language) {
      *language = language_pair.translate_language;
      return;
    }
  }

  // Apply linear search here because number of items in the list is just three.
  for (const auto& language_pair : kLanguageCodeSynonyms) {
    if (main_part == language_pair.chrome_language) {
      *language = language_pair.translate_language;
      return;
    }
  }

  // By default use the base language as the translate synonym.
  *language = std::string(main_part);
}

void ToChromeLanguageSynonym(std::string* language) {
  auto [main_part, tail_part] = language::SplitIntoMainAndTail(*language);
  if (main_part.empty()) {
    return;
  }

  // Apply linear search here because number of items in the list is just three.
  for (const auto& language_pair : kLanguageCodeSynonyms) {
    if (main_part == language_pair.translate_language) {
      main_part = language_pair.chrome_language;
      break;
    }
  }

  *language = base::StrCat({main_part, tail_part});
}

}  // namespace language