File: label_cluster_finalizer.cc

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (80 lines) | stat: -rw-r--r-- 3,248 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/history_clusters/core/label_cluster_finalizer.h"

#include <optional>
#include <string>

#include "base/strings/utf_string_conversions.h"
#include "components/history/core/browser/history_types.h"
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/history_clusters_util.h"
#include "components/history_clusters/core/on_device_clustering_features.h"
#include "components/history_clusters/core/on_device_clustering_util.h"
#include "components/strings/grit/components_strings.h"
#include "components/url_formatter/url_formatter.h"
#include "ui/base/l10n/l10n_util.h"

using LabelSource = history::Cluster::LabelSource;

namespace history_clusters {

LabelClusterFinalizer::LabelClusterFinalizer() = default;
LabelClusterFinalizer::~LabelClusterFinalizer() = default;

void LabelClusterFinalizer::FinalizeCluster(history::Cluster& cluster) {
  float max_label_score = -1;
  std::optional<std::u16string> current_highest_scoring_label;
  std::optional<std::u16string> current_highest_scoring_label_unquoted;
  LabelSource label_source = LabelSource::kUnknown;

  // First try finding search terms to use as the cluster label.
  for (const auto& visit : cluster.visits) {
    if (!visit.annotated_visit.content_annotations.search_terms.empty()) {
      if (visit.score > max_label_score) {
        current_highest_scoring_label_unquoted =
            visit.annotated_visit.content_annotations.search_terms;
        current_highest_scoring_label = l10n_util::GetStringFUTF16(
            IDS_HISTORY_CLUSTERS_CLUSTER_LABEL_SEARCH_TERMS,
            *current_highest_scoring_label_unquoted);
        max_label_score = visit.score;
        label_source = LabelSource::kSearch;
      }
    }
  }

  // If we haven't found a label yet, use hostnames if the flag is enabled.
  if (!current_highest_scoring_label) {
    base::flat_map<std::u16string, float> hostname_to_score;
    for (const auto& visit : cluster.visits) {
      std::u16string host =
          ComputeURLForDisplay(visit.normalized_url, /*trim_after_host=*/true);
      float& hostname_score = hostname_to_score[host];
      hostname_score += visit.score;
      if (hostname_score > max_label_score) {
        current_highest_scoring_label = host;
        current_highest_scoring_label_unquoted = current_highest_scoring_label;
        max_label_score = hostname_score;
        label_source = LabelSource::kHostname;
      }
    }

    // At the end of this process, if we assigned a hostname label, yet there
    // is more than one hostname available, append " and more" to the label.
    if (current_highest_scoring_label && hostname_to_score.size() > 1) {
      current_highest_scoring_label = l10n_util::GetStringFUTF16(
          IDS_HISTORY_CLUSTERS_CLUSTER_LABEL_MULTIPLE_HOSTNAMES,
          *current_highest_scoring_label);
    }
  }

  if (current_highest_scoring_label) {
    cluster.label = *current_highest_scoring_label;
    cluster.raw_label = *current_highest_scoring_label_unquoted;
    cluster.label_source = label_source;
  }
}

}  // namespace history_clusters