File: url_language_histogram.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (79 lines) | stat: -rw-r--r-- 3,136 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_LANGUAGE_CORE_BROWSER_URL_LANGUAGE_HISTOGRAM_H_
#define COMPONENTS_LANGUAGE_CORE_BROWSER_URL_LANGUAGE_HISTOGRAM_H_

#include <string>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "base/time/time.h"
#include "components/keyed_service/core/keyed_service.h"

class PrefRegistrySimple;
class PrefService;

namespace language {

// Collects data about languages in which the user reads the web and provides
// access to current estimated language preferences. The past behaviour is
// discounted so that the histogram reflects changes in browsing habits. This
// histogram does not have to contain all languages that ever appeared in user's
// browsing, languages with insignificant frequency are removed, eventually.
//
// Operates as a "wrapper" around profile preferences: the state of the
// histogram is read from/written to the PrefService in each method call. This
// allows multiple instances of the histogram to be used in a (non-overlapping)
// sequence without any instance-specific state going "out of sync". This
// behaviour is relied on by clients of the histogram.
class UrlLanguageHistogram : public KeyedService {
 public:
  struct LanguageInfo {
    LanguageInfo() = default;
    LanguageInfo(const std::string& language_code, float frequency)
        : language_code(language_code), frequency(frequency) {}

    // The ISO 639 language code.
    std::string language_code;

    // The current estimated frequency of the language share, a number between 0
    // and 1 (can be understood as the probability that the next page the user
    // opens is in this language). Frequencies over all LanguageInfos from
    // GetTopLanguages() sum to 1 (unless there are no top languages, yet).
    float frequency = 0.0f;
  };

  explicit UrlLanguageHistogram(PrefService* pref_service);

  UrlLanguageHistogram(const UrlLanguageHistogram&) = delete;
  UrlLanguageHistogram& operator=(const UrlLanguageHistogram&) = delete;

  ~UrlLanguageHistogram() override;

  // Registers profile prefs for the histogram.
  static void RegisterProfilePrefs(PrefRegistrySimple* registry);

  // Returns a list of the languages currently tracked by the histogram, sorted
  // by frequency in decreasing order. The list is empty, if the histogram has
  // not enough data points.
  std::vector<LanguageInfo> GetTopLanguages() const;

  // Returns the estimated frequency for the given language or 0 if the language
  // is not among the top languages kept in the histogram.
  float GetLanguageFrequency(const std::string& language_code) const;

  // Informs the histogram that a page with the given language has been visited.
  void OnPageVisited(const std::string& language_code);

  // Reflect in the histogram that history from |begin| to |end| gets cleared.
  void ClearHistory(base::Time begin, base::Time end);

 private:
  raw_ptr<PrefService> pref_service_;
};

}  // namespace language

#endif  // COMPONENTS_LANGUAGE_CORE_BROWSER_URL_LANGUAGE_HISTOGRAM_H_