1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by an MIT-style license that can be
// found in the LICENSE file or at https://opensource.org/licenses/MIT.
#ifndef THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_
#define THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_
#include <functional>
#include <optional>
#include <string_view>
#include "base/component_export.h"
#include "base/types/expected.h"
#include "third_party/abseil-cpp/absl/status/status.h"
#include "third_party/liburlpattern/tokenize.h"
namespace liburlpattern {
// A helper class to parse the first string passed to the URLPattern
// constructor. In general the parser works by using the liburlpattern
// tokenizer to first split up the input into pattern tokens. It can
// then look through the tokens to find non-special characters that match
// the different URL component separators. Each component is then split
// off and stored in a `Result` object that can be accessed via `GetResult()`.
// The intent is that this object should then be processed as if it was passed
// into the constructor itself.
class COMPONENT_EXPORT(LIBURLPATTERN) ConstructorStringParser {
public:
struct Result {
std::optional<std::string_view> protocol;
std::optional<std::string_view> username;
std::optional<std::string_view> password;
std::optional<std::string_view> hostname;
std::optional<std::string_view> port;
std::optional<std::string_view> pathname;
std::optional<std::string_view> search;
std::optional<std::string_view> hash;
};
using ProtocolCheckCallback =
std::function<base::expected<bool, absl::Status>(std::string_view)>;
explicit ConstructorStringParser(std::string_view constructor_string);
// Attempt to parse the input string used to construct the Parser object.
// This method may only be called once. Retrieve the parse result by calling
// `GetResult()`.
// `protocol_matches_special_scheme` is called with a protocol string. It must
// return whether the protocol component which is compiled from the protocol
// string matches a special scheme. It is not called for relative pattern
// string. The protocol component created inside the callback can be reused
// when creating a URLPattern object.
absl::Status Parse(ProtocolCheckCallback protocol_matches_special_scheme);
// Return the parse result. Should only be called after `Parse()` succeeds.
const Result& GetResult() const { return result_; }
private:
enum class StringParseState {
kInit,
kProtocol,
kAuthority,
kUsername,
kPassword,
kHostname,
kPort,
kPathname,
kSearch,
kHash,
kDone,
};
using Skip = int;
// A utility function to move from the current `state_` to `new_state`. This
// method will populate the component string in `result_` corresponding to the
// current `state_` automatically. It will also set `component_start_` and
// `token_index_` to point to the first token of the next section based on how
// many tokens the `skip` argument indicates should be ignored.
void ChangeState(StringParseState new_state, Skip skip);
// A utility function to move to `new_state`. This is like `ChangeState()`,
// but does not automatically set the component string for the current state.
void ChangeStateWithoutSettingComponent(StringParseState new_state,
Skip skip);
// Rewind the `token_index_` back to the current `component_start_`.
void Rewind();
// Like `Rewind()`, but also sets the state. This is used for cases where
// the parser needs to "look ahead" to determine what parse state to enter.
void RewindAndSetState(StringParseState new_state);
// Attempt to access the Token at the given `index`. If the `index` is out
// of bounds for the `token_list_`, then the last Token in the list is
// returned. This will always be a `TokenType::kEnd` token.
const Token& SafeToken(size_t index) const;
// Returns true if the token at the given `index` is not a special pattern
// character and if it matches the given `value`. This simply checks that the
// token type is kChar, kEscapedChar, or kInvalidChar.
bool IsNonSpecialPatternChar(size_t index, const char* value) const;
// Returns true if the token at the given `index` is the protocol component
// suffix; e.g. ':'.
bool IsProtocolSuffix() const;
// Returns true if the next two tokens are slashes; e.g. `//`.
bool NextIsAuthoritySlashes() const;
// Returns true if the tokan at the given `index` is the `@` character used
// to separate username and password from the hostname.
bool IsIdentityTerminator() const;
// Returns true if the current token is the password prefix; e.g. `:`.
bool IsPasswordPrefix() const;
// Returns true if the current token is the port prefix; e.g. `:`.
bool IsPortPrefix() const;
// Returns true if the current token is the start of the pathname; e.g. `/`.
bool IsPathnameStart() const;
// Returns true if the current token is the search component prefix; e.g. `?`.
// This also takes into account if this could be a valid pattern modifier by
// looking at the preceding tokens.
bool IsSearchPrefix() const;
// Returns true if the current token is the hsah component prefix; e.g. `#`.
bool IsHashPrefix() const;
// These methods indicate if the current token is opening or closing a pattern
// grouping; e.g. `{` or `}`.
bool IsGroupOpen() const;
bool IsGroupClose() const;
// These methods indicate if the current token is an opening or closing
// bracket for an ipv6 hostname; e.g. '[' or ']'.
bool IsIPv6Open() const;
bool IsIPv6Close() const;
// This method returns a std::string_view consisting of the tokens between
// `component_start_` and the current `token_index_`.
std::string_view MakeComponentString() const;
// The input UTF-8 string to the parser.
const std::string_view input_;
// The list of Tokens produced by calling `Tokenize()` on `input_`.
std::vector<Token> token_list_;
// As we parse the input string we populate a `URLPatternInit` dictionary
// with each component pattern. This is then the final result of the parse.
Result result_;
// The index of the first Token to include in the component string.
size_t component_start_ = 0;
// The index of the current Token being considered.
size_t token_index_ = 0;
// The value to add to `token_index_` on each turn the through the parse
// loop. While typically this is `1`, it is also set to `0` at times for
// things like state transitions, etc. It is automatically reset back to
// `1` at the top of the parse loop.
size_t token_increment_ = 1;
// The current nesting depth of `{ }` pattern groupings.
int group_depth_ = 0;
// The current netsting depth of `[ ]` in hostname patterns.
int hostname_ipv6_bracket_depth_ = 0;
// The current parse state. This should only be changed via `ChangeState()`
// or `RewindAndSetState()`.
StringParseState state_ = StringParseState::kInit;
// True if we should apply parse rules as if this is a "standard" URL. If
// false then this is treated as a "not a base URL" or "path" URL.
bool should_treat_as_standard_url_ = false;
};
} // namespace liburlpattern
#endif // THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_
|