1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/url_deduplication/url_deduplication_helper.h"
#include <memory>
#include <vector>
#include "base/strings/strcat.h"
#include "components/url_deduplication/deduplication_strategy.h"
#include "components/url_deduplication/url_strip_handler.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
namespace url_deduplication {
class MockURLStripHandler : public URLStripHandler {
public:
MockURLStripHandler() = default;
MockURLStripHandler(const MockURLStripHandler&) = delete;
MockURLStripHandler& operator=(const MockURLStripHandler&) = delete;
~MockURLStripHandler() override = default;
MOCK_METHOD1(StripExtraParams, GURL(GURL url));
};
class URLDeduplicationHelperTest : public ::testing::Test {
public:
URLDeduplicationHelperTest() = default;
void InitHelper(std::vector<std::unique_ptr<URLStripHandler>> handlers,
DeduplicationStrategy strategy) {
helper_ =
std::make_unique<URLDeduplicationHelper>(std::move(handlers), strategy);
}
URLDeduplicationHelper* Helper() { return helper_.get(); }
private:
std::unique_ptr<URLDeduplicationHelper> helper_;
};
const std::string kSamplePageTitle = "Sample page title";
TEST_F(URLDeduplicationHelperTest, StripURL) {
GURL full_url = GURL(
"https://www.foopayment.com:123?ref=foo"
"#heading=h.xaresuk9ir9a&password=test1&username=test2?q=test");
DeduplicationStrategy strategy;
strategy.excluded_prefixes = {"www."};
strategy.update_scheme = true;
strategy.clear_username = true;
strategy.clear_password = true;
strategy.clear_query = true;
strategy.clear_ref = true;
strategy.clear_port = true;
InitHelper({}, strategy);
std::string stripped_url =
Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
ASSERT_EQ("http://foopayment.com/", stripped_url);
}
TEST_F(URLDeduplicationHelperTest, StripURLWithHandlers) {
GURL full_url =
GURL("https://www.google.com/search#heading=h.xaresuk9ir9a?q=test");
DeduplicationStrategy strategy;
auto handler1 = std::make_unique<MockURLStripHandler>();
auto handler2 = std::make_unique<MockURLStripHandler>();
EXPECT_CALL(*handler1, StripExtraParams(testing::_))
.Times(1)
.WillOnce(testing::Invoke(
[](GURL url) { return GURL("http://google.com/search"); }));
EXPECT_CALL(*handler2, StripExtraParams(testing::_)).Times(0);
std::vector<std::unique_ptr<URLStripHandler>> handlers;
handlers.push_back(std::move(handler1));
handlers.push_back(std::move(handler2));
InitHelper(std::move(handlers), strategy);
std::string stripped_url =
Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
ASSERT_EQ("http://google.com/search", stripped_url);
}
TEST_F(URLDeduplicationHelperTest, DeduplicateByDomainAndTitle) {
DeduplicationStrategy strategy;
strategy.clear_path = true;
strategy.include_title = true;
InitHelper({}, strategy);
constexpr char kSampleCalendarPageTitle[] =
"Google.com - Calendar - Week of Januaray 5, 2024";
constexpr char kSampleBaseCalendarUrl[] = "https://calendar.google.com/";
const std::string expected_dedup_url_key =
base::StrCat({kSampleBaseCalendarUrl, "#", kSampleCalendarPageTitle});
EXPECT_EQ(expected_dedup_url_key,
Helper()->ComputeURLDeduplicationKey(
GURL(base::StrCat({kSampleBaseCalendarUrl, "calendar/u/0/r"})),
kSampleCalendarPageTitle));
EXPECT_EQ(expected_dedup_url_key,
Helper()->ComputeURLDeduplicationKey(
GURL(base::StrCat(
{kSampleBaseCalendarUrl, "calendar/u/0/r/week/2024/1/05"})),
kSampleCalendarPageTitle));
}
TEST_F(URLDeduplicationHelperTest, StripPrefix) {
GURL url_1 = GURL("https://accounts.google.com");
GURL url_2 = GURL("https://myaccount.google.com");
GURL url_3 = GURL("https://login.corp.google.com");
DeduplicationStrategy strategy;
strategy.excluded_prefixes = {"www.", "accounts.", "myaccount.",
"login.corp."};
InitHelper({}, strategy);
std::string stripped_url_1 =
Helper()->ComputeURLDeduplicationKey(url_1, kSamplePageTitle);
std::string stripped_url_2 =
Helper()->ComputeURLDeduplicationKey(url_2, kSamplePageTitle);
std::string stripped_url_3 =
Helper()->ComputeURLDeduplicationKey(url_3, kSamplePageTitle);
ASSERT_EQ(stripped_url_1, stripped_url_2);
ASSERT_EQ(stripped_url_1, stripped_url_3);
}
} // namespace url_deduplication
|