File: json_parser.h

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (269 lines) | stat: -rw-r--r-- 9,941 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef BASE_JSON_JSON_PARSER_H_
#define BASE_JSON_JSON_PARSER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <utility>

#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/gtest_prod_util.h"
#include "base/json/json_common.h"
#include "base/third_party/icu/icu_utf.h"
#include "base/values.h"

namespace base {

class Value;

namespace internal {

class JSONParserTest;

// The implementation behind the JSONReader interface. This class is not meant
// to be used directly; it encapsulates logic that need not be exposed publicly.
//
// This parser guarantees O(n) time through the input string. Iteration happens
// on the byte level, with the functions ConsumeChars() and ConsumeChar(). The
// conversion from byte to JSON token happens without advancing the parser in
// GetNextToken/ParseToken, that is tokenization operates on the current parser
// position without advancing.
//
// Built on top of these are a family of Consume functions that iterate
// internally. Invariant: on entry of a Consume function, the parser is wound
// to the first byte of a valid JSON token. On exit, it is on the first byte
// after the token that was just consumed, which would likely be the first byte
// of the next token.
class BASE_EXPORT JSONParser {
 public:
  // Error codes during parsing.
  enum JsonParseError {
    JSON_NO_ERROR = base::ValueDeserializer::kErrorCodeNoError,
    JSON_SYNTAX_ERROR = base::ValueDeserializer::kErrorCodeInvalidFormat,
    JSON_INVALID_ESCAPE,
    JSON_UNEXPECTED_TOKEN,
    JSON_TRAILING_COMMA,
    JSON_TOO_MUCH_NESTING,
    JSON_UNEXPECTED_DATA_AFTER_ROOT,
    JSON_UNSUPPORTED_ENCODING,
    JSON_UNQUOTED_DICTIONARY_KEY,
    JSON_UNREPRESENTABLE_NUMBER,
    JSON_PARSE_ERROR_COUNT
  };

  // String versions of parse error codes.
  static const char kSyntaxError[];
  static const char kInvalidEscape[];
  static const char kUnexpectedToken[];
  static const char kTrailingComma[];
  static const char kTooMuchNesting[];
  static const char kUnexpectedDataAfterRoot[];
  static const char kUnsupportedEncoding[];
  static const char kUnquotedDictionaryKey[];
  static const char kUnrepresentableNumber[];

  explicit JSONParser(int options, size_t max_depth = kAbsoluteMaxDepth);

  JSONParser(const JSONParser&) = delete;
  JSONParser& operator=(const JSONParser&) = delete;

  ~JSONParser();

  // Parses the input string according to the set options and returns the
  // result as a Value.
  // Wrap this in base::FooValue::From() to check the Value is of type Foo and
  // convert to a FooValue at the same time.
  std::optional<Value> Parse(std::string_view input);

  // Returns the error code.
  JsonParseError error_code() const;

  // Returns the human-friendly error message.
  std::string GetErrorMessage() const;

  // Returns the error line number if parse error happened. Otherwise always
  // returns 0.
  int error_line() const;

  // Returns the error column number if parse error happened. Otherwise always
  // returns 0.
  int error_column() const;

 private:
  enum Token {
    T_OBJECT_BEGIN,  // {
    T_OBJECT_END,    // }
    T_ARRAY_BEGIN,   // [
    T_ARRAY_END,     // ]
    T_STRING,
    T_NUMBER,
    T_BOOL_TRUE,              // true
    T_BOOL_FALSE,             // false
    T_NULL,                   // null
    T_LIST_SEPARATOR,         // ,
    T_OBJECT_PAIR_SEPARATOR,  // :
    T_END_OF_INPUT,
    T_INVALID_TOKEN,
  };

  // Returns the next |count| bytes of the input stream, or nullopt if fewer
  // than |count| bytes remain.
  std::optional<std::string_view> PeekChars(size_t count);

  // Calls PeekChars() with a |count| of 1.
  std::optional<char> PeekChar();

  // Returns the next |count| bytes of the input stream, or nullopt if fewer
  // than |count| bytes remain, and advances the parser position by |count|.
  std::optional<std::string_view> ConsumeChars(size_t count);

  // Calls ConsumeChars() with a |count| of 1.
  std::optional<char> ConsumeChar();

  // Returns a pointer to the current character position.
  const char* pos();

  // Skips over whitespace and comments to find the next token in the stream.
  // This does not advance the parser for non-whitespace or comment chars.
  Token GetNextToken();

  // Consumes whitespace characters and comments until the next non-that is
  // encountered.
  void EatWhitespaceAndComments();
  // Helper function that consumes a comment, assuming that the parser is
  // currently wound to a '/'.
  bool EatComment();

  // Calls GetNextToken() and then ParseToken().
  std::optional<Value> ParseNextToken();

  // Takes a token that represents the start of a Value ("a structural token"
  // in RFC terms) and consumes it, returning the result as a Value.
  std::optional<Value> ParseToken(Token token);

  // Assuming that the parser is currently wound to '{', this parses a JSON
  // object into a Value.
  std::optional<Value> ConsumeDictionary();

  // Assuming that the parser is wound to '[', this parses a JSON list into a
  // Value.
  std::optional<Value> ConsumeList();

  // Calls through ConsumeStringRaw and wraps it in a value.
  std::optional<Value> ConsumeString();

  // Assuming that the parser is wound to a double quote, this parses a string,
  // decoding any escape sequences and validating UTF-8. Returns the string on
  // success or std::nullopt on error, with error information set.
  std::optional<std::string> ConsumeStringRaw();

  enum class StringResult {
    // Parsing stopped because of invalid input. Error information has been set.
    // The caller should return failure.
    kError,
    // Parsing stopped because the string is finished. The parser is wound to
    // just paste the closing quote. The caller should stop parsing the string.
    kDone,
    // Parsing stopped because of invalid Unicode which should be replaced with
    // a replacement character. The parser is wound to just past the input that
    // should be a replacement character. The caller should add a replacement
    // character and continue parsing.
    kReplacementCharacter,
    // Parsing stopped because of an escape sequence. The parser is wound to
    // just past the backslash. The caller should consume the escape sequence
    // and continue parsing.
    kEscape,
  };

  // Consumes the portion of a JavaScript string which may be copied to the
  // input with no conversions, stopping at one of the events above. Returns the
  // reason parsing stopped and the data that was consumed. This should be
  // called in a loop, handling all the cases above until reaching kDone.
  std::pair<StringResult, std::string_view> ConsumeStringPart();

  // Helper function for ConsumeStringRaw() that consumes the next four or 10
  // bytes (parser is wound to the first character of a HEX sequence, with the
  // potential for consuming another \uXXXX for a surrogate). Returns true on
  // success and places the code point |out_code_point|, and false on failure.
  bool DecodeUTF16(base_icu::UChar32* out_code_point);

  // Assuming that the parser is wound to the start of a valid JSON number,
  // this parses and converts it to either an int or double value.
  std::optional<Value> ConsumeNumber();
  // Helper that reads characters that are ints. Returns true if a number was
  // read and false on error.
  bool ReadInt(bool allow_leading_zeros);

  // Consumes the literal values of |true|, |false|, and |null|, assuming the
  // parser is wound to the first character of any of those.
  std::optional<Value> ConsumeLiteral();

  // Helper function that returns true if the byte squence |match| can be
  // consumed at the current parser position. Returns false if there are fewer
  // than |match|-length bytes or if the sequence does not match, and the
  // parser state is unchanged.
  bool ConsumeIfMatch(std::string_view match);

  // Sets the error information to |code| at the current column, based on
  // |index_| and |index_last_line_|, with an optional positive/negative
  // adjustment by |column_adjust|.
  void ReportError(JsonParseError code, int column_adjust);

  // Given the line and column number of an error, formats one of the error
  // message contants from json_reader.h for human display.
  static std::string FormatErrorMessage(int line,
                                        int column,
                                        const std::string& description);

  // base::JSONParserOptions that control parsing.
  const int options_;

  // Maximum depth to parse.
  const size_t max_depth_;

  // The input stream being parsed. Note: Not guaranteed to NUL-terminated.
  std::string_view input_;

  // The index in the input stream to which the parser is wound.
  size_t index_;

  // The number of times the parser has recursed (current stack depth).
  size_t stack_depth_;

  // The line number that the parser is at currently.
  int line_number_;

  // The last value of |index_| on the previous line.
  size_t index_last_line_;

  // Error information.
  JsonParseError error_code_;
  int error_line_;
  int error_column_;

  friend class JSONParserTest;
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
};

// Used when decoding and an invalid utf-8 sequence is encountered.
BASE_EXPORT extern const char kUnicodeReplacementString[];

}  // namespace internal
}  // namespace base

#endif  // BASE_JSON_JSON_PARSER_H_