1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
|
/**
* @file url_pattern_helpers.h
* @brief Declaration for the URLPattern helpers.
*/
#ifndef ADA_URL_PATTERN_HELPERS_H
#define ADA_URL_PATTERN_HELPERS_H
#include "ada/expected.h"
#include "ada/common_defs.h"
#include "ada/url_pattern.h"
#include <string>
#include <tuple>
#include <vector>
#if ADA_INCLUDE_URL_PATTERN
namespace ada {
enum class errors : uint8_t;
}
namespace ada::url_pattern_helpers {
// @see https://urlpattern.spec.whatwg.org/#token
enum class token_type : uint8_t {
INVALID_CHAR, // 0
OPEN, // 1
CLOSE, // 2
REGEXP, // 3
NAME, // 4
CHAR, // 5
ESCAPED_CHAR, // 6
OTHER_MODIFIER, // 7
ASTERISK, // 8
END, // 9
};
#ifdef ADA_TESTING
std::string to_string(token_type type);
#endif // ADA_TESTING
// @see https://urlpattern.spec.whatwg.org/#tokenize-policy
enum class token_policy {
strict,
lenient,
};
// @see https://urlpattern.spec.whatwg.org/#tokens
class token {
public:
token(token_type _type, size_t _index, std::string_view _value)
: type(_type), index(_index), value(_value) {}
// A token has an associated type, a string, initially "invalid-char".
token_type type = token_type::INVALID_CHAR;
// A token has an associated index, a number, initially 0. It is the position
// of the first code point in the pattern string represented by the token.
size_t index = 0;
// A token has an associated value, a string, initially the empty string. It
// contains the code points from the pattern string represented by the token.
std::string_view value{};
};
// @see https://urlpattern.spec.whatwg.org/#pattern-parser
template <url_pattern_encoding_callback F>
class url_pattern_parser {
public:
url_pattern_parser(F& encoding_callback_,
std::string_view segment_wildcard_regexp_)
: encoding_callback(encoding_callback_),
segment_wildcard_regexp(segment_wildcard_regexp_) {}
bool can_continue() const { return index < tokens.size(); }
// @see https://urlpattern.spec.whatwg.org/#try-to-consume-a-token
token* try_consume_token(token_type type);
// @see https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token
token* try_consume_modifier_token();
// @see
// https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token
token* try_consume_regexp_or_wildcard_token(const token* name_token);
// @see https://urlpattern.spec.whatwg.org/#consume-text
std::string consume_text();
// @see https://urlpattern.spec.whatwg.org/#consume-a-required-token
bool consume_required_token(token_type type);
// @see
// https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value
std::optional<errors> maybe_add_part_from_the_pending_fixed_value()
ada_warn_unused;
// @see https://urlpattern.spec.whatwg.org/#add-a-part
std::optional<errors> add_part(std::string_view prefix, token* name_token,
token* regexp_or_wildcard_token,
std::string_view suyffix,
token* modifier_token) ada_warn_unused;
std::vector<token> tokens{};
F& encoding_callback;
std::string segment_wildcard_regexp;
std::vector<url_pattern_part> parts{};
std::string pending_fixed_value{};
size_t index = 0;
size_t next_numeric_name = 0;
};
// @see https://urlpattern.spec.whatwg.org/#tokenizer
class Tokenizer {
public:
explicit Tokenizer(std::string_view new_input, token_policy new_policy)
: input(new_input), policy(new_policy) {}
// @see https://urlpattern.spec.whatwg.org/#get-the-next-code-point
constexpr void get_next_code_point();
// @see https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point
constexpr void seek_and_get_next_code_point(size_t index);
// @see https://urlpattern.spec.whatwg.org/#add-a-token
void add_token(token_type type, size_t next_position, size_t value_position,
size_t value_length);
// @see https://urlpattern.spec.whatwg.org/#add-a-token-with-default-length
void add_token_with_default_length(token_type type, size_t next_position,
size_t value_position);
// @see
// https://urlpattern.spec.whatwg.org/#add-a-token-with-default-position-and-length
void add_token_with_defaults(token_type type);
// @see https://urlpattern.spec.whatwg.org/#process-a-tokenizing-error
std::optional<errors> process_tokenizing_error(
size_t next_position, size_t value_position) ada_warn_unused;
friend tl::expected<std::vector<token>, errors> tokenize(
std::string_view input, token_policy policy);
private:
// has an associated input, a pattern string, initially the empty string.
std::string_view input;
// has an associated policy, a tokenize policy, initially "strict".
token_policy policy;
// has an associated token list, a token list, initially an empty list.
std::vector<token> token_list{};
// has an associated index, a number, initially 0.
size_t index = 0;
// has an associated next index, a number, initially 0.
size_t next_index = 0;
// has an associated code point, a Unicode code point, initially null.
char32_t code_point{};
};
// @see https://urlpattern.spec.whatwg.org/#constructor-string-parser
template <url_pattern_regex::regex_concept regex_provider>
struct constructor_string_parser {
explicit constructor_string_parser(std::string_view new_input,
std::vector<token>&& new_token_list)
: input(new_input), token_list(std::move(new_token_list)) {}
// @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string
static tl::expected<url_pattern_init, errors> parse(std::string_view input);
// @see https://urlpattern.spec.whatwg.org/#constructor-string-parser-state
enum class State {
INIT,
PROTOCOL,
AUTHORITY,
USERNAME,
PASSWORD,
HOSTNAME,
PORT,
PATHNAME,
SEARCH,
HASH,
DONE,
};
// @see
// https://urlpattern.spec.whatwg.org/#compute-protocol-matches-a-special-scheme-flag
std::optional<errors> compute_protocol_matches_special_scheme_flag();
private:
// @see https://urlpattern.spec.whatwg.org/#rewind
constexpr void rewind();
// @see https://urlpattern.spec.whatwg.org/#is-a-hash-prefix
constexpr bool is_hash_prefix();
// @see https://urlpattern.spec.whatwg.org/#is-a-search-prefix
constexpr bool is_search_prefix();
// @see https://urlpattern.spec.whatwg.org/#change-state
void change_state(State state, size_t skip);
// @see https://urlpattern.spec.whatwg.org/#is-a-group-open
constexpr bool is_group_open() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-group-close
constexpr bool is_group_close() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-protocol-suffix
constexpr bool is_protocol_suffix() const;
// @see https://urlpattern.spec.whatwg.org/#next-is-authority-slashes
constexpr bool next_is_authority_slashes() const;
// @see https://urlpattern.spec.whatwg.org/#is-an-identity-terminator
constexpr bool is_an_identity_terminator() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-pathname-start
constexpr bool is_pathname_start() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-password-prefix
constexpr bool is_password_prefix() const;
// @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-open
constexpr bool is_an_ipv6_open() const;
// @see https://urlpattern.spec.whatwg.org/#is-an-ipv6-close
constexpr bool is_an_ipv6_close() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-port-prefix
constexpr bool is_port_prefix() const;
// @see https://urlpattern.spec.whatwg.org/#is-a-non-special-pattern-char
constexpr bool is_non_special_pattern_char(size_t index,
uint32_t value) const;
// @see https://urlpattern.spec.whatwg.org/#get-a-safe-token
constexpr const token* get_safe_token(size_t index) const;
// @see https://urlpattern.spec.whatwg.org/#make-a-component-string
std::string make_component_string();
// has an associated input, a string, which must be set upon creation.
std::string_view input;
// has an associated token list, a token list, which must be set upon
// creation.
std::vector<token> token_list;
// has an associated result, a URLPatternInit, initially set to a new
// URLPatternInit.
url_pattern_init result{};
// has an associated component start, a number, initially set to 0.
size_t component_start = 0;
// has an associated token index, a number, initially set to 0.
size_t token_index = 0;
// has an associated token increment, a number, initially set to 1.
size_t token_increment = 1;
// has an associated group depth, a number, initially set to 0.
size_t group_depth = 0;
// has an associated hostname IPv6 bracket depth, a number, initially set to
// 0.
size_t hostname_ipv6_bracket_depth = 0;
// has an associated protocol matches a special scheme flag, a boolean,
// initially set to false.
bool protocol_matches_a_special_scheme_flag = false;
// has an associated state, a string, initially set to "init".
State state = State::INIT;
};
// @see https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol
tl::expected<std::string, errors> canonicalize_protocol(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-username
tl::expected<std::string, errors> canonicalize_username(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
tl::expected<std::string, errors> canonicalize_password(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
tl::expected<std::string, errors> canonicalize_hostname(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname
tl::expected<std::string, errors> canonicalize_ipv6_hostname(
std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
tl::expected<std::string, errors> canonicalize_port(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
tl::expected<std::string, errors> canonicalize_port_with_protocol(
std::string_view input, std::string_view protocol);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname
tl::expected<std::string, errors> canonicalize_pathname(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname
tl::expected<std::string, errors> canonicalize_opaque_pathname(
std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-search
tl::expected<std::string, errors> canonicalize_search(std::string_view input);
// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash
tl::expected<std::string, errors> canonicalize_hash(std::string_view input);
// @see https://urlpattern.spec.whatwg.org/#tokenize
tl::expected<std::vector<token>, errors> tokenize(std::string_view input,
token_policy policy);
// @see https://urlpattern.spec.whatwg.org/#process-a-base-url-string
std::string process_base_url_string(std::string_view input,
url_pattern_init::process_type type);
// @see https://urlpattern.spec.whatwg.org/#escape-a-pattern-string
std::string escape_pattern_string(std::string_view input);
// @see https://urlpattern.spec.whatwg.org/#escape-a-regexp-string
std::string escape_regexp_string(std::string_view input);
// @see https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname
constexpr bool is_absolute_pathname(
std::string_view input, url_pattern_init::process_type type) noexcept;
// @see https://urlpattern.spec.whatwg.org/#parse-a-pattern-string
template <url_pattern_encoding_callback F>
tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
std::string_view input, url_pattern_compile_component_options& options,
F& encoding_callback);
// @see https://urlpattern.spec.whatwg.org/#generate-a-pattern-string
std::string generate_pattern_string(
std::vector<url_pattern_part>& part_list,
url_pattern_compile_component_options& options);
// @see
// https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list
std::tuple<std::string, std::vector<std::string>>
generate_regular_expression_and_name_list(
const std::vector<url_pattern_part>& part_list,
url_pattern_compile_component_options options);
// @see https://urlpattern.spec.whatwg.org/#hostname-pattern-is-an-ipv6-address
bool is_ipv6_address(std::string_view input) noexcept;
// @see
// https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme
template <url_pattern_regex::regex_concept regex_provider>
bool protocol_component_matches_special_scheme(
ada::url_pattern_component<regex_provider>& input);
// @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);
// @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
std::string generate_segment_wildcard_regexp(
url_pattern_compile_component_options options);
} // namespace ada::url_pattern_helpers
#endif // ADA_INCLUDE_URL_PATTERN
#endif
|