File: url_util.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (153 lines) | stat: -rw-r--r-- 6,389 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_MATCHER_URL_UTIL_H_
#define COMPONENTS_URL_MATCHER_URL_UTIL_H_

#include "base/memory/scoped_refptr.h"
#include "base/values.h"
#include "components/url_matcher/url_matcher.h"
#include "components/url_matcher/url_matcher_export.h"

class GURL;

namespace url_matcher {
namespace util {

// Maximum filters allowed. Filters over this index are ignored.
inline constexpr size_t kMaxFiltersAllowed = 1000;

// Converts a ValueList `value` of strings into a vector. Returns true if
// successful.
bool GetAsStringVector(const base::Value* value, std::vector<std::string>* out);

// Normalizes a URL for matching purposes.
URL_MATCHER_EXPORT GURL Normalize(const GURL& url);

// Helper function to extract the underlying URL wrapped by services such as
// Google AMP or Google Translate. Returns an empty GURL if `url` doesn't match
// a known format.
URL_MATCHER_EXPORT GURL GetEmbeddedURL(const GURL& url);

// Helper function to extract the underlying URL wrapped by Google AMP viewer.
// Returns an empty GURL if `url` doesn't match a known format.
URL_MATCHER_EXPORT GURL GetGoogleAmpViewerEmbeddedURL(const GURL& url);

// Utility struct used to represent a url filter scheme into its components.
struct URL_MATCHER_EXPORT FilterComponents {
  FilterComponents();
  FilterComponents(const FilterComponents&) = delete;
  FilterComponents(FilterComponents&&);
  FilterComponents& operator=(const FilterComponents&) = delete;
  FilterComponents& operator=(FilterComponents&&) = default;

  ~FilterComponents();

  // Returns true if `this` represents the "*" filter.
  bool IsWildcard() const;
  std::string scheme;
  std::string host;
  uint16_t port = 0;
  std::string path;
  std::string query;
  // Number of conditions that a url needs to match it to be considered a match
  // for this filter.
  int number_of_url_matching_conditions = 0;
  bool match_subdomains = true;
  bool allow = true;
};

// Creates a condition set that can be used with the `url_matcher`. `id` needs
// to be a unique number that will be returned by the `url_matcher` if the URL
// matches that condition set. `allow` indicates if it is an allow-list (true)
// or block-list (false) filter.
URL_MATCHER_EXPORT scoped_refptr<url_matcher::URLMatcherConditionSet>
CreateConditionSet(url_matcher::URLMatcher* url_matcher,
                   base::MatcherStringPattern::ID id,
                   const std::string& scheme,
                   const std::string& host,
                   bool match_subdomains,
                   uint16_t port,
                   const std::string& path,
                   const std::string& query,
                   bool allow);

// Splits a URL filter into its components. A GURL isn't used because these
// can be invalid URLs e.g. "google.com".
// Returns false if the URL couldn't be parsed. In case false is returned,
// the values of output parameters are undefined.
// The `filter` should have the format described at
// http://www.chromium.org/administrators/url-blocklist-filter-format and
// accepts wildcards. The `host` is preprocessed so it can be passed to
// URLMatcher for the appropriate condition. The optional username and password
// are ignored. `match_subdomains` specifies whether the filter should include
// subdomains of the hostname (if it is one.) `port` is 0 if none is explicitly
// defined. `path` does not include query parameters. `query` contains the query
// parameters ('?' not included). All arguments are mandatory.
URL_MATCHER_EXPORT bool FilterToComponents(const std::string& filter,
                                           std::string* scheme,
                                           std::string* host,
                                           bool* match_subdomains,
                                           uint16_t* port,
                                           std::string* path,
                                           std::string* query);

// Adds a limited number of URL filters `patterns` to a URLMatcher
// `matcher`. The `max_filters` parameter specifies the maximum number of
// filters added.
//
// If `allow` is true, the filters will allow matching URLs; otherwise, they
// block them. The `id` parameter provides a pointer to the ID assigned to the
// filters, incremented for each filter added.
//
// `patterns` should be a list of URL patterns (see format description at
// http://www.chromium.org/administrators/url-blocklist-filter-format).
//
// An optional map to store the generated FilterComponents can be provided
// via |filters|.
URL_MATCHER_EXPORT void AddFiltersWithLimit(
    url_matcher::URLMatcher* matcher,
    bool allow,
    base::MatcherStringPattern::ID* id,
    const base::Value::List& patterns,
    std::map<base::MatcherStringPattern::ID, FilterComponents>* filters =
        nullptr,
    size_t max_filters = kMaxFiltersAllowed);

// Adds a limited number of URL filters `patterns` to a URLMatcher
// `matcher`. The `max_filters` parameter specifies the maximum number of
// filters added.
//
// If `allow` is true, the filters will allow matching URLs; otherwise, they
// block them. The `id` parameter provides a pointer to the ID assigned to the
// filters, incremented for each filter added.
//
// `patterns` should be a list of URL patterns (see format description at
// http://www.chromium.org/administrators/url-blocklist-filter-format).
//
// An optional map to store the generated FilterComponents can be provided
// via |filters|.
URL_MATCHER_EXPORT void AddFiltersWithLimit(
    url_matcher::URLMatcher* matcher,
    bool allow,
    base::MatcherStringPattern::ID* id,
    const std::vector<std::string>& patterns,
    std::map<base::MatcherStringPattern::ID, FilterComponents>* filters =
        nullptr,
    size_t max_filters = kMaxFiltersAllowed);

URL_MATCHER_EXPORT void AddAllowFiltersWithLimit(
    url_matcher::URLMatcher* matcher,
    const base::Value::List& patterns,
    size_t max_filters = kMaxFiltersAllowed);

URL_MATCHER_EXPORT void AddAllowFiltersWithLimit(
    url_matcher::URLMatcher* matcher,
    const std::vector<std::string>& patterns,
    size_t max_filters = kMaxFiltersAllowed);

}  // namespace util
}  // namespace url_matcher

#endif  // COMPONENTS_URL_MATCHER_URL_UTIL_H_