File: url_pattern_helpers.h

package info (click to toggle)
ada-url 3.4.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,320 kB
  • sloc: cpp: 24,281; ansic: 4,553; python: 573; sh: 193; makefile: 17
file content (349 lines) | stat: -rw-r--r-- 13,385 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/**
 * @file url_pattern_helpers.h
 * @brief Declaration for the URLPattern helpers.
 */
#ifndef ADA_URL_PATTERN_HELPERS_H
#define ADA_URL_PATTERN_HELPERS_H

#include "ada/expected.h"
#include "ada/common_defs.h"
#include "ada/url_pattern.h"

#include <string>
#include <tuple>
#include <vector>

#if ADA_INCLUDE_URL_PATTERN
namespace ada {
enum class errors : uint8_t;
}

namespace ada::url_pattern_helpers {

// @see https://urlpattern.spec.whatwg.org/#token
enum class token_type : uint8_t {
  INVALID_CHAR,    // 0
  OPEN,            // 1
  CLOSE,           // 2
  REGEXP,          // 3
  NAME,            // 4
  CHAR,            // 5
  ESCAPED_CHAR,    // 6
  OTHER_MODIFIER,  // 7
  ASTERISK,        // 8
  END,             // 9
};

#ifdef ADA_TESTING
std::string to_string(token_type type);
#endif  // ADA_TESTING

// @see https://urlpattern.spec.whatwg.org/#tokenize-policy
enum class token_policy {
  strict,
  lenient,
};

// @see https://urlpattern.spec.whatwg.org/#tokens
class token {
 public:
  token(token_type _type, size_t _index, std::string_view _value)
      : type(_type), index(_index), value(_value) {}

  // A token has an associated type, a string, initially "invalid-char".
  token_type type = token_type::INVALID_CHAR;

  // A token has an associated index, a number, initially 0. It is the position
  // of the first code point in the pattern string represented by the token.
  size_t index = 0;

  // A token has an associated value, a string, initially the empty string. It
  // contains the code points from the pattern string represented by the token.
  std::string_view value{};
};

// @see https://urlpattern.spec.whatwg.org/#pattern-parser
template <url_pattern_encoding_callback F>
class url_pattern_parser {
 public:
  url_pattern_parser(F& encoding_callback_,
                     std::string_view segment_wildcard_regexp_)
      : encoding_callback(encoding_callback_),
        segment_wildcard_regexp(segment_wildcard_regexp_) {}

  bool can_continue() const { return index < tokens.size(); }

  // @see https://urlpattern.spec.whatwg.org/#try-to-consume-a-token
  token* try_consume_token(token_type type);
  // @see https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token
  token* try_consume_modifier_token();
  // @see
  // https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token
  token* try_consume_regexp_or_wildcard_token(const token* name_token);
  // @see https://urlpattern.spec.whatwg.org/#consume-text
  std::string consume_text();
  // @see https://urlpattern.spec.whatwg.org/#consume-a-required-token
  bool consume_required_token(token_type type);
  // @see
  // https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value
  std::optional<errors> maybe_add_part_from_the_pending_fixed_value()
      ada_warn_unused;
  // @see https://urlpattern.spec.whatwg.org/#add-a-part
  std::optional<errors> add_part(std::string_view prefix, token* name_token,
                                 token* regexp_or_wildcard_token,
                                 std::string_view suyffix,
                                 token* modifier_token) ada_warn_unused;

  std::vector<token> tokens{};
  F& encoding_callback;
  std::string segment_wildcard_regexp;
  std::vector<url_pattern_part> parts{};
  std::string pending_fixed_value{};
  size_t index = 0;
  size_t next_numeric_name = 0;
};

// @see https://urlpattern.spec.whatwg.org/#tokenizer
class Tokenizer {
 public:
  explicit Tokenizer(std::string_view new_input, token_policy new_policy)
      : input(new_input), policy(new_policy) {}

  // @see https://urlpattern.spec.whatwg.org/#get-the-next-code-point
  constexpr void get_next_code_point();

  // @see https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point
  constexpr void seek_and_get_next_code_point(size_t index);

  // @see https://urlpattern.spec.whatwg.org/#add-a-token

  void add_token(token_type type, size_t next_position, size_t value_position,
                 size_t value_length);

  // @see https://urlpattern.spec.whatwg.org/#add-a-token-with-default-length
  void add_token_with_default_length(token_type type, size_t next_position,
                                     size_t value_position);

  // @see
  // https://urlpattern.spec.whatwg.org/#add-a-token-with-default-position-and-length
  void add_token_with_defaults(token_type type);

  // @see https://urlpattern.spec.whatwg.org/#process-a-tokenizing-error
  std::optional<errors> process_tokenizing_error(
      size_t next_position, size_t value_position) ada_warn_unused;

  friend tl::expected<std::vector<token>, errors> tokenize(
      std::string_view input, token_policy policy);

 private:
  // has an associated input, a pattern string, initially the empty string.
  std::string_view input;
  // has an associated policy, a tokenize policy, initially "strict".
  token_policy policy;
  // has an associated token list, a token list, initially an empty list.
  std::vector<token> token_list{};
  // has an associated index, a number, initially 0.
  size_t index = 0;
  // has an associated next index, a number, initially 0.
  size_t next_index = 0;
  // has an associated code point, a Unicode code point, initially null.
  char32_t code_point{};
};

// @see https://urlpattern.spec.whatwg.org/#constructor-string-parser
template <url_pattern_regex::regex_concept regex_provider>
struct constructor_string_parser {
  explicit constructor_string_parser(std::string_view new_input,
                                     std::vector<token>&& new_token_list)
      : input(new_input), token_list(std::move(new_token_list)) {}
  // @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string
  static tl::expected<url_pattern_init, errors> parse(std::string_view input);

  // @see https://urlpattern.spec.whatwg.org/#constructor-string-parser-state
  enum class State {
    INIT,
    PROTOCOL,
    AUTHORITY,
    USERNAME,
    PASSWORD,
    HOSTNAME,
    PORT,
    PATHNAME,
    SEARCH,
    HASH,
    DONE,
  };

  // @see
  // https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag
  std::optional<errors> compute_protocol_matches_special_scheme_flag();

 private:
  // @see https://urlpattern.spec.whatwg.org/#rewind
  constexpr void rewind();

  // @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix
  constexpr bool is_hash_prefix();

  // @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix
  constexpr bool is_search_prefix();

  // @see https://urlpattern.spec.whatwg.org/#change-state
  void change_state(State state, size_t skip);

  // @see https://urlpattern.spec.whatwg.org/#is-a-group-open
  constexpr bool is_group_open() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-group-close
  constexpr bool is_group_close() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-protocol-suffix
  constexpr bool is_protocol_suffix() const;

  // @see https://urlpattern.spec.whatwg.org/#next-is-authority-slashes
  constexpr bool next_is_authority_slashes() const;

  // @see https://urlpattern.spec.whatwg.org/#is-an-identity-terminator
  constexpr bool is_an_identity_terminator() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-pathname-start
  constexpr bool is_pathname_start() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-password-prefix
  constexpr bool is_password_prefix() const;

  // @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-open
  constexpr bool is_an_ipv6_open() const;

  // @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-close
  constexpr bool is_an_ipv6_close() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-port-prefix
  constexpr bool is_port_prefix() const;

  // @see https://urlpattern.spec.whatwg.org/#is-a-non-special-pattern-char
  constexpr bool is_non_special_pattern_char(size_t index,
                                             uint32_t value) const;

  // @see https://urlpattern.spec.whatwg.org/#get-a-safe-token
  constexpr const token* get_safe_token(size_t index) const;

  // @see https://urlpattern.spec.whatwg.org/#make-a-component-string
  std::string make_component_string();
  // has an associated input, a string, which must be set upon creation.
  std::string_view input;
  // has an associated token list, a token list, which must be set upon
  // creation.
  std::vector<token> token_list;
  // has an associated result, a URLPatternInit, initially set to a new
  // URLPatternInit.
  url_pattern_init result{};
  // has an associated component start, a number, initially set to 0.
  size_t component_start = 0;
  // has an associated token index, a number, initially set to 0.
  size_t token_index = 0;
  // has an associated token increment, a number, initially set to 1.
  size_t token_increment = 1;
  // has an associated group depth, a number, initially set to 0.
  size_t group_depth = 0;
  // has an associated hostname IPv6 bracket depth, a number, initially set to
  // 0.
  size_t hostname_ipv6_bracket_depth = 0;
  // has an associated protocol matches a special scheme flag, a boolean,
  // initially set to false.
  bool protocol_matches_a_special_scheme_flag = false;
  // has an associated state, a string, initially set to "init".
  State state = State::INIT;
};

// @see https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol
tl::expected<std::string, errors> canonicalize_protocol(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-username
tl::expected<std::string, errors> canonicalize_username(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
tl::expected<std::string, errors> canonicalize_password(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
tl::expected<std::string, errors> canonicalize_hostname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname
tl::expected<std::string, errors> canonicalize_ipv6_hostname(
    std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
tl::expected<std::string, errors> canonicalize_port(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
tl::expected<std::string, errors> canonicalize_port_with_protocol(
    std::string_view input, std::string_view protocol);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname
tl::expected<std::string, errors> canonicalize_pathname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname
tl::expected<std::string, errors> canonicalize_opaque_pathname(
    std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-search
tl::expected<std::string, errors> canonicalize_search(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash
tl::expected<std::string, errors> canonicalize_hash(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#tokenize
tl::expected<std::vector<token>, errors> tokenize(std::string_view input,
                                                  token_policy policy);

// @see https://urlpattern.spec.whatwg.org/#process-a-base-url-string
std::string process_base_url_string(std::string_view input,
                                    url_pattern_init::process_type type);

// @see https://urlpattern.spec.whatwg.org/#escape-a-pattern-string
std::string escape_pattern_string(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#escape-a-regexp-string
std::string escape_regexp_string(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname
constexpr bool is_absolute_pathname(
    std::string_view input, url_pattern_init::process_type type) noexcept;

// @see https://urlpattern.spec.whatwg.org/#parse-a-pattern-string
template <url_pattern_encoding_callback F>
tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
    std::string_view input, url_pattern_compile_component_options& options,
    F& encoding_callback);

// @see https://urlpattern.spec.whatwg.org/#generate-a-pattern-string
std::string generate_pattern_string(
    std::vector<url_pattern_part>& part_list,
    url_pattern_compile_component_options& options);

// @see
// https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list
std::tuple<std::string, std::vector<std::string>>
generate_regular_expression_and_name_list(
    const std::vector<url_pattern_part>& part_list,
    url_pattern_compile_component_options options);

// @see https://urlpattern.spec.whatwg.org/#hostname-pattern-is-an-ipv6-address
bool is_ipv6_address(std::string_view input) noexcept;

// @see
// https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme
template <url_pattern_regex::regex_concept regex_provider>
bool protocol_component_matches_special_scheme(
    ada::url_pattern_component<regex_provider>& input);

// @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);

// @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
std::string generate_segment_wildcard_regexp(
    url_pattern_compile_component_options options);

}  // namespace ada::url_pattern_helpers
#endif  // ADA_INCLUDE_URL_PATTERN
#endif