File: language_usage_metrics.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (103 lines) | stat: -rw-r--r-- 3,306 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/language/core/browser/language_usage_metrics.h"

#include <stddef.h>

#include <set>
#include <string_view>

#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_tokenizer.h"
#include "components/language/core/browser/url_language_histogram.h"

namespace language {

// static
void LanguageUsageMetrics::RecordAcceptLanguages(
    std::string_view accept_languages) {
  std::vector<int> languages = ParseAcceptLanguages(accept_languages);

  UMA_HISTOGRAM_COUNTS_100("LanguageUsage.AcceptLanguage.Count",
                           languages.size());
  for (int language_code : languages) {
    base::UmaHistogramSparse("LanguageUsage.AcceptLanguage", language_code);
  }

  if (!languages.empty()) {
    base::UmaHistogramSparse("LanguageUsage.AcceptLanguage.FirstAcceptLanguage",
                             languages[0]);
  }
}

// static
void LanguageUsageMetrics::RecordPageLanguages(
    const language::UrlLanguageHistogram& language_counts) {
  const float kMinLanguageFrequency = 0.05;
  std::vector<language::UrlLanguageHistogram::LanguageInfo> top_languages =
      language_counts.GetTopLanguages();

  for (const language::UrlLanguageHistogram::LanguageInfo& language_info :
       top_languages) {
    if (language_info.frequency < kMinLanguageFrequency) {
      continue;
    }

    const int language_code = ToLanguageCodeHash(language_info.language_code);
    if (language_code != 0) {
      base::UmaHistogramSparse("LanguageUsage.MostFrequentPageLanguages",
                               language_code);
    }
  }
}

// static
int LanguageUsageMetrics::ToLanguageCodeHash(std::string_view locale) {
  std::string_view language_part =
      locale.substr(0U, locale.find_first_of("-_"));

  int language_code = 0;
  for (size_t i = 0U; i < language_part.size(); ++i) {
    // It's undefined behavior in C++ to left-shift a signed int past its sign
    // bit, so only shift until the int's sign bit is reached. Note that it's
    // safe to shift up to sizeof(int) times because each character is only
    // added if it's between 'a' and 'z', which all have a 0 in their 7th bit.
    // For example, for 4-byte ints, "zzzz" would be converted to 0x7A7A7A7A,
    // which doesn't quite reach the sign bit, making it safe to insert up to 4
    // characters.
    if (i == sizeof(language_code))
      return 0;

    char ch = language_part[i];
    if ('A' <= ch && ch <= 'Z')
      ch += ('a' - 'A');
    else if (ch < 'a' || 'z' < ch)
      return 0;

    language_code <<= 8;
    language_code += ch;
  }

  return language_code;
}

// static
std::vector<int> LanguageUsageMetrics::ParseAcceptLanguages(
    std::string_view accept_languages) {
  std::set<int> visited_languages;
  std::vector<int> languages;

  base::StringViewTokenizer locales(accept_languages, ",");
  while (locales.GetNext()) {
    const int language_hash = ToLanguageCodeHash(locales.token_piece());
    if (language_hash != 0 && visited_languages.insert(language_hash).second) {
      languages.push_back(language_hash);
    }
  }
  return languages;
}

}  // namespace language