File: duk_lexer.h

package info (click to toggle)
duktape 2.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 21,160 kB
  • sloc: ansic: 215,359; python: 5,961; javascript: 4,555; makefile: 477; cpp: 205
file content (435 lines) | stat: -rw-r--r-- 15,446 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
/*
 *  Lexer defines.
 */

#if !defined(DUK_LEXER_H_INCLUDED)
#define DUK_LEXER_H_INCLUDED

typedef void (*duk_re_range_callback)(void *user, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct);

/*
 *  A token is interpreted as any possible production of InputElementDiv
 *  and InputElementRegExp, see E5 Section 7 in its entirety.  Note that
 *  the E5 "Token" production does not cover all actual tokens of the
 *  language (which is explicitly stated in the specification, Section 7.5).
 *  Null and boolean literals are defined as part of both ReservedWord
 *  (E5 Section 7.6.1) and Literal (E5 Section 7.8) productions.  Here,
 *  null and boolean values have literal tokens, and are not reserved
 *  words.
 *
 *  Decimal literal negative/positive sign is -not- part of DUK_TOK_NUMBER.
 *  The number tokens always have a non-negative value.  The unary minus
 *  operator in "-1.0" is optimized during compilation to yield a single
 *  negative constant.
 *
 *  Token numbering is free except that reserved words are required to be
 *  in a continuous range and in a particular order.  See genstrings.py.
 */

#define DUK_LEXER_INITCTX(ctx) duk_lexer_initctx((ctx))

#define DUK_LEXER_SETPOINT(ctx, pt) duk_lexer_setpoint((ctx), (pt))

#define DUK_LEXER_GETPOINT(ctx, pt) duk_lexer_getpoint((ctx), (pt))

/* Currently 6 characters of lookup are actually needed (duk_lexer.c). */
#define DUK_LEXER_WINDOW_SIZE 6
#if defined(DUK_USE_LEXER_SLIDING_WINDOW)
#define DUK_LEXER_BUFFER_SIZE 64
#endif

#define DUK_TOK_MINVAL 0

/* returned after EOF (infinite amount) */
#define DUK_TOK_EOF 0

/* identifier names (E5 Section 7.6) */
#define DUK_TOK_IDENTIFIER 1

/* reserved words: keywords */
#define DUK_TOK_START_RESERVED 2
#define DUK_TOK_BREAK          2
#define DUK_TOK_CASE           3
#define DUK_TOK_CATCH          4
#define DUK_TOK_CONTINUE       5
#define DUK_TOK_DEBUGGER       6
#define DUK_TOK_DEFAULT        7
#define DUK_TOK_DELETE         8
#define DUK_TOK_DO             9
#define DUK_TOK_ELSE           10
#define DUK_TOK_FINALLY        11
#define DUK_TOK_FOR            12
#define DUK_TOK_FUNCTION       13
#define DUK_TOK_IF             14
#define DUK_TOK_IN             15
#define DUK_TOK_INSTANCEOF     16
#define DUK_TOK_NEW            17
#define DUK_TOK_RETURN         18
#define DUK_TOK_SWITCH         19
#define DUK_TOK_THIS           20
#define DUK_TOK_THROW          21
#define DUK_TOK_TRY            22
#define DUK_TOK_TYPEOF         23
#define DUK_TOK_VAR            24
#define DUK_TOK_CONST          25
#define DUK_TOK_VOID           26
#define DUK_TOK_WHILE          27
#define DUK_TOK_WITH           28

/* reserved words: future reserved words */
#define DUK_TOK_CLASS   29
#define DUK_TOK_ENUM    30
#define DUK_TOK_EXPORT  31
#define DUK_TOK_EXTENDS 32
#define DUK_TOK_IMPORT  33
#define DUK_TOK_SUPER   34

/* "null", "true", and "false" are always reserved words.
 * Note that "get" and "set" are not!
 */
#define DUK_TOK_NULL  35
#define DUK_TOK_TRUE  36
#define DUK_TOK_FALSE 37

/* reserved words: additional future reserved words in strict mode */
#define DUK_TOK_START_STRICT_RESERVED 38 /* inclusive */
#define DUK_TOK_IMPLEMENTS            38
#define DUK_TOK_INTERFACE             39
#define DUK_TOK_LET                   40
#define DUK_TOK_PACKAGE               41
#define DUK_TOK_PRIVATE               42
#define DUK_TOK_PROTECTED             43
#define DUK_TOK_PUBLIC                44
#define DUK_TOK_STATIC                45
#define DUK_TOK_YIELD                 46

#define DUK_TOK_END_RESERVED 47 /* exclusive */

/* "get" and "set" are tokens but NOT ReservedWords.  They are currently
 * parsed and identifiers and these defines are actually now unused.
 */
#define DUK_TOK_GET 47
#define DUK_TOK_SET 48

/* punctuators (unlike the spec, also includes "/" and "/=") */
#define DUK_TOK_LCURLY     49
#define DUK_TOK_RCURLY     50
#define DUK_TOK_LBRACKET   51
#define DUK_TOK_RBRACKET   52
#define DUK_TOK_LPAREN     53
#define DUK_TOK_RPAREN     54
#define DUK_TOK_PERIOD     55
#define DUK_TOK_SEMICOLON  56
#define DUK_TOK_COMMA      57
#define DUK_TOK_LT         58
#define DUK_TOK_GT         59
#define DUK_TOK_LE         60
#define DUK_TOK_GE         61
#define DUK_TOK_EQ         62
#define DUK_TOK_NEQ        63
#define DUK_TOK_SEQ        64
#define DUK_TOK_SNEQ       65
#define DUK_TOK_ADD        66
#define DUK_TOK_SUB        67
#define DUK_TOK_MUL        68
#define DUK_TOK_DIV        69
#define DUK_TOK_MOD        70
#define DUK_TOK_EXP        71
#define DUK_TOK_INCREMENT  72
#define DUK_TOK_DECREMENT  73
#define DUK_TOK_ALSHIFT    74 /* named "arithmetic" because result is signed */
#define DUK_TOK_ARSHIFT    75
#define DUK_TOK_RSHIFT     76
#define DUK_TOK_BAND       77
#define DUK_TOK_BOR        78
#define DUK_TOK_BXOR       79
#define DUK_TOK_LNOT       80
#define DUK_TOK_BNOT       81
#define DUK_TOK_LAND       82
#define DUK_TOK_LOR        83
#define DUK_TOK_QUESTION   84
#define DUK_TOK_COLON      85
#define DUK_TOK_EQUALSIGN  86
#define DUK_TOK_ADD_EQ     87
#define DUK_TOK_SUB_EQ     88
#define DUK_TOK_MUL_EQ     89
#define DUK_TOK_DIV_EQ     90
#define DUK_TOK_MOD_EQ     91
#define DUK_TOK_EXP_EQ     92
#define DUK_TOK_ALSHIFT_EQ 93
#define DUK_TOK_ARSHIFT_EQ 94
#define DUK_TOK_RSHIFT_EQ  95
#define DUK_TOK_BAND_EQ    96
#define DUK_TOK_BOR_EQ     97
#define DUK_TOK_BXOR_EQ    98

/* literals (E5 Section 7.8), except null, true, false, which are treated
 * like reserved words (above).
 */
#define DUK_TOK_NUMBER 99
#define DUK_TOK_STRING 100
#define DUK_TOK_REGEXP 101

#define DUK_TOK_MAXVAL 101 /* inclusive */

#define DUK_TOK_INVALID DUK_SMALL_UINT_MAX

/* Convert heap string index to a token (reserved words) */
#define DUK_STRIDX_TO_TOK(x) ((x) -DUK_STRIDX_START_RESERVED + DUK_TOK_START_RESERVED)

/* Sanity check */
#if (DUK_TOK_MAXVAL > 255)
#error DUK_TOK_MAXVAL too large, code assumes it fits into 8 bits
#endif

/* Sanity checks for string and token defines */
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_BREAK) != DUK_TOK_BREAK)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CASE) != DUK_TOK_CASE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CATCH) != DUK_TOK_CATCH)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONTINUE) != DUK_TOK_CONTINUE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEBUGGER) != DUK_TOK_DEBUGGER)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEFAULT) != DUK_TOK_DEFAULT)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DELETE) != DUK_TOK_DELETE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DO) != DUK_TOK_DO)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ELSE) != DUK_TOK_ELSE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FINALLY) != DUK_TOK_FINALLY)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FOR) != DUK_TOK_FOR)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_FUNCTION) != DUK_TOK_FUNCTION)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IF) != DUK_TOK_IF)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IN) != DUK_TOK_IN)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INSTANCEOF) != DUK_TOK_INSTANCEOF)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_NEW) != DUK_TOK_NEW)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_RETURN) != DUK_TOK_RETURN)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SWITCH) != DUK_TOK_SWITCH)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THIS) != DUK_TOK_THIS)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THROW) != DUK_TOK_THROW)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRY) != DUK_TOK_TRY)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TYPEOF) != DUK_TOK_TYPEOF)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VAR) != DUK_TOK_VAR)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VOID) != DUK_TOK_VOID)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WHILE) != DUK_TOK_WHILE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WITH) != DUK_TOK_WITH)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CLASS) != DUK_TOK_CLASS)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONST) != DUK_TOK_CONST)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ENUM) != DUK_TOK_ENUM)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXPORT) != DUK_TOK_EXPORT)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXTENDS) != DUK_TOK_EXTENDS)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPORT) != DUK_TOK_IMPORT)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SUPER) != DUK_TOK_SUPER)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_NULL) != DUK_TOK_NULL)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRUE) != DUK_TOK_TRUE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FALSE) != DUK_TOK_FALSE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPLEMENTS) != DUK_TOK_IMPLEMENTS)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INTERFACE) != DUK_TOK_INTERFACE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LET) != DUK_TOK_LET)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PACKAGE) != DUK_TOK_PACKAGE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PRIVATE) != DUK_TOK_PRIVATE)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PROTECTED) != DUK_TOK_PROTECTED)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PUBLIC) != DUK_TOK_PUBLIC)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_STATIC) != DUK_TOK_STATIC)
#error mismatch in token defines
#endif
#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_YIELD) != DUK_TOK_YIELD)
#error mismatch in token defines
#endif

/* Regexp tokens */
#define DUK_RETOK_EOF                           0
#define DUK_RETOK_DISJUNCTION                   1
#define DUK_RETOK_QUANTIFIER                    2
#define DUK_RETOK_ASSERT_START                  3
#define DUK_RETOK_ASSERT_END                    4
#define DUK_RETOK_ASSERT_WORD_BOUNDARY          5
#define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY      6
#define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD    7
#define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD    8
#define DUK_RETOK_ATOM_PERIOD                   9
#define DUK_RETOK_ATOM_CHAR                     10
#define DUK_RETOK_ATOM_DIGIT                    11 /* assumptions in regexp compiler */
#define DUK_RETOK_ATOM_NOT_DIGIT                12 /* -""- */
#define DUK_RETOK_ATOM_WHITE                    13 /* -""- */
#define DUK_RETOK_ATOM_NOT_WHITE                14 /* -""- */
#define DUK_RETOK_ATOM_WORD_CHAR                15 /* -""- */
#define DUK_RETOK_ATOM_NOT_WORD_CHAR            16 /* -""- */
#define DUK_RETOK_ATOM_BACKREFERENCE            17
#define DUK_RETOK_ATOM_START_CAPTURE_GROUP      18
#define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP   19
#define DUK_RETOK_ATOM_START_CHARCLASS          20
#define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED 21
#define DUK_RETOK_ATOM_END_GROUP                22

/* Constants for duk_lexer_ctx.buf. */
#define DUK_LEXER_TEMP_BUF_LIMIT 256

/* A token value.  Can be memcpy()'d, but note that slot1/slot2 values are on the valstack.
 * Some fields (like num, str1, str2) are only valid for specific token types and may have
 * stale values otherwise.
 */
struct duk_token {
	duk_small_uint_t t; /* token type (with reserved word identification) */
	duk_small_uint_t t_nores; /* token type (with reserved words as DUK_TOK_IDENTIFER) */
	duk_double_t num; /* numeric value of token */
	duk_hstring *str1; /* string 1 of token (borrowed, stored to ctx->slot1_idx) */
	duk_hstring *str2; /* string 2 of token (borrowed, stored to ctx->slot2_idx) */
	duk_size_t start_offset; /* start byte offset of token in lexer input */
	duk_int_t start_line; /* start line of token (first char) */
	duk_int_t num_escapes; /* number of escapes and line continuations (for directive prologue) */
	duk_bool_t lineterm; /* token was preceded by a lineterm */
	duk_bool_t allow_auto_semi; /* token allows automatic semicolon insertion (eof or preceded by newline) */
};

#define DUK_RE_QUANTIFIER_INFINITE ((duk_uint32_t) 0xffffffffUL)

/* A regexp token value. */
struct duk_re_token {
	duk_small_uint_t t; /* token type */
	duk_small_uint_t greedy;
	duk_uint32_t num; /* numeric value (character, count) */
	duk_uint32_t qmin;
	duk_uint32_t qmax;
};

/* A structure for 'snapshotting' a point for rewinding */
struct duk_lexer_point {
	duk_size_t offset;
	duk_int_t line;
};

/* Lexer codepoint with additional info like offset/line number */
struct duk_lexer_codepoint {
	duk_codepoint_t codepoint;
	duk_size_t offset;
	duk_int_t line;
};

/* Lexer context.  Same context is used for ECMAScript and Regexp parsing. */
struct duk_lexer_ctx {
#if defined(DUK_USE_LEXER_SLIDING_WINDOW)
	duk_lexer_codepoint *window; /* unicode code points, window[0] is always next, points to 'buffer' */
	duk_lexer_codepoint buffer[DUK_LEXER_BUFFER_SIZE];
#else
	duk_lexer_codepoint window[DUK_LEXER_WINDOW_SIZE]; /* unicode code points, window[0] is always next */
#endif

	duk_hthread *thr; /* thread; minimizes argument passing */

	const duk_uint8_t *input; /* input string (may be a user pointer) */
	duk_size_t input_length; /* input byte length */
	duk_size_t input_offset; /* input offset for window leading edge (not window[0]) */
	duk_int_t input_line; /* input linenumber at input_offset (not window[0]), init to 1 */

	duk_idx_t slot1_idx; /* valstack slot for 1st token value */
	duk_idx_t slot2_idx; /* valstack slot for 2nd token value */
	duk_idx_t buf_idx; /* valstack slot for temp buffer */
	duk_hbuffer_dynamic *buf; /* temp accumulation buffer */
	duk_bufwriter_ctx bw; /* bufwriter for temp accumulation */

	duk_int_t token_count; /* number of tokens parsed */
	duk_int_t token_limit; /* maximum token count before error (sanity backstop) */

	duk_small_uint_t flags; /* lexer flags, use compiler flag defines for now */
};

/*
 *  Prototypes
 */

DUK_INTERNAL_DECL void duk_lexer_initctx(duk_lexer_ctx *lex_ctx);

DUK_INTERNAL_DECL void duk_lexer_getpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt);
DUK_INTERNAL_DECL void duk_lexer_setpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt);

DUK_INTERNAL_DECL
void duk_lexer_parse_js_input_element(duk_lexer_ctx *lex_ctx, duk_token *out_token, duk_bool_t strict_mode, duk_bool_t regexp_mode);
#if defined(DUK_USE_REGEXP_SUPPORT)
DUK_INTERNAL_DECL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token);
DUK_INTERNAL_DECL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata);
#endif /* DUK_USE_REGEXP_SUPPORT */

#endif /* DUK_LEXER_H_INCLUDED */