File: constructor_string_parser.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (184 lines) | stat: -rw-r--r-- 7,388 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by an MIT-style license that can be
// found in the LICENSE file or at https://opensource.org/licenses/MIT.

#ifndef THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_
#define THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_

#include <functional>
#include <optional>
#include <string_view>

#include "base/component_export.h"
#include "base/types/expected.h"
#include "third_party/abseil-cpp/absl/status/status.h"
#include "third_party/liburlpattern/tokenize.h"

namespace liburlpattern {

// A helper class to parse the first string passed to the URLPattern
// constructor.  In general the parser works by using the liburlpattern
// tokenizer to first split up the input into pattern tokens.  It can
// then look through the tokens to find non-special characters that match
// the different URL component separators.  Each component is then split
// off and stored in a `Result` object that can be accessed via `GetResult()`.
// The intent is that this object should then be processed as if it was passed
// into the constructor itself.
class COMPONENT_EXPORT(LIBURLPATTERN) ConstructorStringParser {
 public:
  struct Result {
    std::optional<std::string_view> protocol;
    std::optional<std::string_view> username;
    std::optional<std::string_view> password;
    std::optional<std::string_view> hostname;
    std::optional<std::string_view> port;
    std::optional<std::string_view> pathname;
    std::optional<std::string_view> search;
    std::optional<std::string_view> hash;
  };
  using ProtocolCheckCallback =
      std::function<base::expected<bool, absl::Status>(std::string_view)>;

  explicit ConstructorStringParser(std::string_view constructor_string);

  // Attempt to parse the input string used to construct the Parser object.
  // This method may only be called once.  Retrieve the parse result by calling
  // `GetResult()`.
  // `protocol_matches_special_scheme` is called with a protocol string. It must
  // return whether the protocol component which is compiled from the protocol
  // string matches a special scheme. It is not called for relative pattern
  // string. The protocol component created inside the callback can be reused
  // when creating a URLPattern object.
  absl::Status Parse(ProtocolCheckCallback protocol_matches_special_scheme);

  // Return the parse result.  Should only be called after `Parse()` succeeds.
  const Result& GetResult() const { return result_; }

 private:
  enum class StringParseState {
    kInit,
    kProtocol,
    kAuthority,
    kUsername,
    kPassword,
    kHostname,
    kPort,
    kPathname,
    kSearch,
    kHash,
    kDone,
  };

  using Skip = int;

  // A utility function to move from the current `state_` to `new_state`.  This
  // method will populate the component string in `result_` corresponding to the
  // current `state_` automatically.  It will also set `component_start_` and
  // `token_index_` to point to the first token of the next section based on how
  // many tokens the `skip` argument indicates should be ignored.
  void ChangeState(StringParseState new_state, Skip skip);

  // A utility function to move to `new_state`.  This is like `ChangeState()`,
  // but does not automatically set the component string for the current state.
  void ChangeStateWithoutSettingComponent(StringParseState new_state,
                                          Skip skip);

  // Rewind the `token_index_` back to the current `component_start_`.
  void Rewind();

  // Like `Rewind()`, but also sets the state.  This is used for cases where
  // the parser needs to "look ahead" to determine what parse state to enter.
  void RewindAndSetState(StringParseState new_state);

  // Attempt to access the Token at the given `index`.  If the `index` is out
  // of bounds for the `token_list_`, then the last Token in the list is
  // returned.  This will always be a `TokenType::kEnd` token.
  const Token& SafeToken(size_t index) const;

  // Returns true if the token at the given `index` is not a special pattern
  // character and if it matches the given `value`.  This simply checks that the
  // token type is kChar, kEscapedChar, or kInvalidChar.
  bool IsNonSpecialPatternChar(size_t index, const char* value) const;

  // Returns true if the token at the given `index` is the protocol component
  // suffix; e.g. ':'.
  bool IsProtocolSuffix() const;

  // Returns true if the next two tokens are slashes; e.g. `//`.
  bool NextIsAuthoritySlashes() const;

  // Returns true if the tokan at the given `index` is the `@` character used
  // to separate username and password from the hostname.
  bool IsIdentityTerminator() const;

  // Returns true if the current token is the password prefix; e.g. `:`.
  bool IsPasswordPrefix() const;

  // Returns true if the current token is the port prefix; e.g. `:`.
  bool IsPortPrefix() const;

  // Returns true if the current token is the start of the pathname; e.g. `/`.
  bool IsPathnameStart() const;

  // Returns true if the current token is the search component prefix; e.g. `?`.
  // This also takes into account if this could be a valid pattern modifier by
  // looking at the preceding tokens.
  bool IsSearchPrefix() const;

  // Returns true if the current token is the hsah component prefix; e.g. `#`.
  bool IsHashPrefix() const;

  // These methods indicate if the current token is opening or closing a pattern
  // grouping; e.g. `{` or `}`.
  bool IsGroupOpen() const;
  bool IsGroupClose() const;

  // These methods indicate if the current token is an opening or closing
  // bracket for an ipv6 hostname; e.g. '[' or ']'.
  bool IsIPv6Open() const;
  bool IsIPv6Close() const;

  // This method returns a std::string_view consisting of the tokens between
  // `component_start_` and the current `token_index_`.
  std::string_view MakeComponentString() const;

  // The input UTF-8 string to the parser.
  const std::string_view input_;

  // The list of Tokens produced by calling `Tokenize()` on `input_`.
  std::vector<Token> token_list_;

  // As we parse the input string we populate a `URLPatternInit` dictionary
  // with each component pattern.  This is then the final result of the parse.
  Result result_;

  // The index of the first Token to include in the component string.
  size_t component_start_ = 0;

  // The index of the current Token being considered.
  size_t token_index_ = 0;

  // The value to add to `token_index_` on each turn the through the parse
  // loop.  While typically this is `1`, it is also set to `0` at times for
  // things like state transitions, etc.  It is automatically reset back to
  // `1` at the top of the parse loop.
  size_t token_increment_ = 1;

  // The current nesting depth of `{ }` pattern groupings.
  int group_depth_ = 0;

  // The current netsting depth of `[ ]` in hostname patterns.
  int hostname_ipv6_bracket_depth_ = 0;

  // The current parse state.  This should only be changed via `ChangeState()`
  // or `RewindAndSetState()`.
  StringParseState state_ = StringParseState::kInit;

  // True if we should apply parse rules as if this is a "standard" URL.  If
  // false then this is treated as a "not a base URL" or "path" URL.
  bool should_treat_as_standard_url_ = false;
};

}  // namespace liburlpattern

#endif  // THIRD_PARTY_LIBURLPATTERN_CONSTRUCTOR_STRING_PARSER_H_