File: url_deduplication_helper_unittest.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (125 lines) | stat: -rw-r--r-- 4,705 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_deduplication/url_deduplication_helper.h"

#include <memory>
#include <vector>

#include "base/strings/strcat.h"
#include "components/url_deduplication/deduplication_strategy.h"
#include "components/url_deduplication/url_strip_handler.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"

namespace url_deduplication {

class MockURLStripHandler : public URLStripHandler {
 public:
  MockURLStripHandler() = default;
  MockURLStripHandler(const MockURLStripHandler&) = delete;
  MockURLStripHandler& operator=(const MockURLStripHandler&) = delete;
  ~MockURLStripHandler() override = default;

  MOCK_METHOD1(StripExtraParams, GURL(GURL url));
};

class URLDeduplicationHelperTest : public ::testing::Test {
 public:
  URLDeduplicationHelperTest() = default;

  void InitHelper(std::vector<std::unique_ptr<URLStripHandler>> handlers,
                  DeduplicationStrategy strategy) {
    helper_ =
        std::make_unique<URLDeduplicationHelper>(std::move(handlers), strategy);
  }

  URLDeduplicationHelper* Helper() { return helper_.get(); }

 private:
  std::unique_ptr<URLDeduplicationHelper> helper_;
};

const std::string kSamplePageTitle = "Sample page title";

TEST_F(URLDeduplicationHelperTest, StripURL) {
  GURL full_url = GURL(
      "https://www.foopayment.com:123?ref=foo"
      "#heading=h.xaresuk9ir9a&password=test1&username=test2?q=test");
  DeduplicationStrategy strategy;
  strategy.excluded_prefixes = {"www."};
  strategy.update_scheme = true;
  strategy.clear_username = true;
  strategy.clear_password = true;
  strategy.clear_query = true;
  strategy.clear_ref = true;
  strategy.clear_port = true;
  InitHelper({}, strategy);
  std::string stripped_url =
      Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
  ASSERT_EQ("http://foopayment.com/", stripped_url);
}

TEST_F(URLDeduplicationHelperTest, StripURLWithHandlers) {
  GURL full_url =
      GURL("https://www.google.com/search#heading=h.xaresuk9ir9a?q=test");
  DeduplicationStrategy strategy;
  auto handler1 = std::make_unique<MockURLStripHandler>();
  auto handler2 = std::make_unique<MockURLStripHandler>();
  EXPECT_CALL(*handler1, StripExtraParams(testing::_))
      .Times(1)
      .WillOnce(testing::Invoke(
          [](GURL url) { return GURL("http://google.com/search"); }));
  EXPECT_CALL(*handler2, StripExtraParams(testing::_)).Times(0);
  std::vector<std::unique_ptr<URLStripHandler>> handlers;
  handlers.push_back(std::move(handler1));
  handlers.push_back(std::move(handler2));
  InitHelper(std::move(handlers), strategy);
  std::string stripped_url =
      Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
  ASSERT_EQ("http://google.com/search", stripped_url);
}

TEST_F(URLDeduplicationHelperTest, DeduplicateByDomainAndTitle) {
  DeduplicationStrategy strategy;
  strategy.clear_path = true;
  strategy.include_title = true;
  InitHelper({}, strategy);

  constexpr char kSampleCalendarPageTitle[] =
      "Google.com - Calendar - Week of Januaray 5, 2024";
  constexpr char kSampleBaseCalendarUrl[] = "https://calendar.google.com/";
  const std::string expected_dedup_url_key =
      base::StrCat({kSampleBaseCalendarUrl, "#", kSampleCalendarPageTitle});
  EXPECT_EQ(expected_dedup_url_key,
            Helper()->ComputeURLDeduplicationKey(
                GURL(base::StrCat({kSampleBaseCalendarUrl, "calendar/u/0/r"})),
                kSampleCalendarPageTitle));
  EXPECT_EQ(expected_dedup_url_key,
            Helper()->ComputeURLDeduplicationKey(
                GURL(base::StrCat(
                    {kSampleBaseCalendarUrl, "calendar/u/0/r/week/2024/1/05"})),
                kSampleCalendarPageTitle));
}

TEST_F(URLDeduplicationHelperTest, StripPrefix) {
  GURL url_1 = GURL("https://accounts.google.com");
  GURL url_2 = GURL("https://myaccount.google.com");
  GURL url_3 = GURL("https://login.corp.google.com");
  DeduplicationStrategy strategy;
  strategy.excluded_prefixes = {"www.", "accounts.", "myaccount.",
                                "login.corp."};
  InitHelper({}, strategy);
  std::string stripped_url_1 =
      Helper()->ComputeURLDeduplicationKey(url_1, kSamplePageTitle);
  std::string stripped_url_2 =
      Helper()->ComputeURLDeduplicationKey(url_2, kSamplePageTitle);
  std::string stripped_url_3 =
      Helper()->ComputeURLDeduplicationKey(url_3, kSamplePageTitle);
  ASSERT_EQ(stripped_url_1, stripped_url_2);
  ASSERT_EQ(stripped_url_1, stripped_url_3);
}

}  // namespace url_deduplication