File: duk_hstring.h

package info (click to toggle)
duktape 2.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 21,160 kB
  • sloc: ansic: 215,359; python: 5,961; javascript: 4,555; makefile: 477; cpp: 205
file content (265 lines) | stat: -rw-r--r-- 11,347 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*
 *  Heap string representation.
 *
 *  Strings are byte sequences ordinarily stored in extended UTF-8 format,
 *  allowing values larger than the official UTF-8 range (used internally)
 *  and also allowing UTF-8 encoding of surrogate pairs (CESU-8 format).
 *  Strings may also be invalid UTF-8 altogether which is the case e.g. with
 *  strings used as internal property names and raw buffers converted to
 *  strings.  In such cases the 'clen' field contains an inaccurate value.
 *
 *  ECMAScript requires support for 32-bit long strings.  However, since each
 *  16-bit codepoint can take 3 bytes in CESU-8, this representation can only
 *  support about 1.4G codepoint long strings in extreme cases.  This is not
 *  really a practical issue.
 */

#if !defined(DUK_HSTRING_H_INCLUDED)
#define DUK_HSTRING_H_INCLUDED

/* Impose a maximum string length for now.  Restricted artificially to
 * ensure adding a heap header length won't overflow size_t.  The limit
 * should be synchronized with DUK_HBUFFER_MAX_BYTELEN.
 *
 * E5.1 makes provisions to support strings longer than 4G characters.
 * This limit should be eliminated on 64-bit platforms (and increased
 * closer to maximum support on 32-bit platforms).
 */

#if defined(DUK_USE_STRLEN16)
#define DUK_HSTRING_MAX_BYTELEN (0x0000ffffUL)
#else
#define DUK_HSTRING_MAX_BYTELEN (0x7fffffffUL)
#endif

/* XXX: could add flags for "is valid CESU-8" (ECMAScript compatible strings),
 * "is valid UTF-8", "is valid extended UTF-8" (internal strings are not,
 * regexp bytecode is), and "contains non-BMP characters".  These are not
 * needed right now.
 */

/* With lowmem builds the high 16 bits of duk_heaphdr are used for other
 * purposes, so this leaves 7 duk_heaphdr flags and 9 duk_hstring flags.
 */
#define DUK_HSTRING_FLAG_ASCII  DUK_HEAPHDR_USER_FLAG(0) /* string is ASCII, clen == blen */
#define DUK_HSTRING_FLAG_ARRIDX DUK_HEAPHDR_USER_FLAG(1) /* string is a valid array index */
#define DUK_HSTRING_FLAG_SYMBOL DUK_HEAPHDR_USER_FLAG(2) /* string is a symbol (invalid utf-8) */
#define DUK_HSTRING_FLAG_HIDDEN \
	DUK_HEAPHDR_USER_FLAG(3) /* string is a hidden symbol (implies symbol, Duktape 1.x internal string) */
#define DUK_HSTRING_FLAG_RESERVED_WORD        DUK_HEAPHDR_USER_FLAG(4) /* string is a reserved word (non-strict) */
#define DUK_HSTRING_FLAG_STRICT_RESERVED_WORD DUK_HEAPHDR_USER_FLAG(5) /* string is a reserved word (strict) */
#define DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS    DUK_HEAPHDR_USER_FLAG(6) /* string is 'eval' or 'arguments' */
#define DUK_HSTRING_FLAG_EXTDATA              DUK_HEAPHDR_USER_FLAG(7) /* string data is external (duk_hstring_external) */
#define DUK_HSTRING_FLAG_PINNED_LITERAL       DUK_HEAPHDR_USER_FLAG(8) /* string is a literal, and pinned */

#define DUK_HSTRING_HAS_ASCII(x)                DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII)
#define DUK_HSTRING_HAS_ARRIDX(x)               DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX)
#define DUK_HSTRING_HAS_SYMBOL(x)               DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL)
#define DUK_HSTRING_HAS_HIDDEN(x)               DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN)
#define DUK_HSTRING_HAS_RESERVED_WORD(x)        DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD)
#define DUK_HSTRING_HAS_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD)
#define DUK_HSTRING_HAS_EVAL_OR_ARGUMENTS(x)    DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS)
#define DUK_HSTRING_HAS_EXTDATA(x)              DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA)
#define DUK_HSTRING_HAS_PINNED_LITERAL(x)       DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL)

#define DUK_HSTRING_SET_ASCII(x)                DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII)
#define DUK_HSTRING_SET_ARRIDX(x)               DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX)
#define DUK_HSTRING_SET_SYMBOL(x)               DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL)
#define DUK_HSTRING_SET_HIDDEN(x)               DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN)
#define DUK_HSTRING_SET_RESERVED_WORD(x)        DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD)
#define DUK_HSTRING_SET_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD)
#define DUK_HSTRING_SET_EVAL_OR_ARGUMENTS(x)    DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS)
#define DUK_HSTRING_SET_EXTDATA(x)              DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA)
#define DUK_HSTRING_SET_PINNED_LITERAL(x)       DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL)

#define DUK_HSTRING_CLEAR_ASCII(x)                DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII)
#define DUK_HSTRING_CLEAR_ARRIDX(x)               DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX)
#define DUK_HSTRING_CLEAR_SYMBOL(x)               DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL)
#define DUK_HSTRING_CLEAR_HIDDEN(x)               DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN)
#define DUK_HSTRING_CLEAR_RESERVED_WORD(x)        DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD)
#define DUK_HSTRING_CLEAR_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD)
#define DUK_HSTRING_CLEAR_EVAL_OR_ARGUMENTS(x)    DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS)
#define DUK_HSTRING_CLEAR_EXTDATA(x)              DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA)
#define DUK_HSTRING_CLEAR_PINNED_LITERAL(x)       DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL)

#if 0 /* Slightly smaller code without explicit flag, but explicit flag \
       * is very useful when 'clen' is dropped. \
       */
#define DUK_HSTRING_IS_ASCII(x) (DUK_HSTRING_GET_BYTELEN((x)) == DUK_HSTRING_GET_CHARLEN((x)))
#endif
#define DUK_HSTRING_IS_ASCII(x) DUK_HSTRING_HAS_ASCII((x)) /* lazily set! */
#define DUK_HSTRING_IS_EMPTY(x) (DUK_HSTRING_GET_BYTELEN((x)) == 0)

#if defined(DUK_USE_STRHASH16)
#define DUK_HSTRING_GET_HASH(x) ((x)->hdr.h_flags >> 16)
#define DUK_HSTRING_SET_HASH(x, v) \
	do { \
		(x)->hdr.h_flags = ((x)->hdr.h_flags & 0x0000ffffUL) | ((v) << 16); \
	} while (0)
#else
#define DUK_HSTRING_GET_HASH(x) ((x)->hash)
#define DUK_HSTRING_SET_HASH(x, v) \
	do { \
		(x)->hash = (v); \
	} while (0)
#endif

#if defined(DUK_USE_STRLEN16)
#define DUK_HSTRING_GET_BYTELEN(x) ((x)->hdr.h_strextra16)
#define DUK_HSTRING_SET_BYTELEN(x, v) \
	do { \
		(x)->hdr.h_strextra16 = (v); \
	} while (0)
#if defined(DUK_USE_HSTRING_CLEN)
#define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x))
#define DUK_HSTRING_SET_CHARLEN(x, v) \
	do { \
		(x)->clen16 = (v); \
	} while (0)
#else
#define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x))
#define DUK_HSTRING_SET_CHARLEN(x, v) \
	do { \
		DUK_ASSERT(0); /* should never be called */ \
	} while (0)
#endif
#else
#define DUK_HSTRING_GET_BYTELEN(x) ((x)->blen)
#define DUK_HSTRING_SET_BYTELEN(x, v) \
	do { \
		(x)->blen = (v); \
	} while (0)
#define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x))
#define DUK_HSTRING_SET_CHARLEN(x, v) \
	do { \
		(x)->clen = (v); \
	} while (0)
#endif

#if defined(DUK_USE_HSTRING_EXTDATA)
#define DUK_HSTRING_GET_EXTDATA(x) ((x)->extdata)
#define DUK_HSTRING_GET_DATA(x) \
	(DUK_HSTRING_HAS_EXTDATA((x)) ? DUK_HSTRING_GET_EXTDATA((const duk_hstring_external *) (x)) : \
                                        ((const duk_uint8_t *) ((x) + 1)))
#else
#define DUK_HSTRING_GET_DATA(x) ((const duk_uint8_t *) ((x) + 1))
#endif

#define DUK_HSTRING_GET_DATA_END(x) (DUK_HSTRING_GET_DATA((x)) + (x)->blen)

/* Marker value; in E5 2^32-1 is not a valid array index (2^32-2 is highest
 * valid).
 */
#define DUK_HSTRING_NO_ARRAY_INDEX (0xffffffffUL)

#if defined(DUK_USE_HSTRING_ARRIDX)
#define DUK_HSTRING_GET_ARRIDX_FAST(h) ((h)->arridx)
#define DUK_HSTRING_GET_ARRIDX_SLOW(h) ((h)->arridx)
#else
/* Get array index related to string (or return DUK_HSTRING_NO_ARRAY_INDEX);
 * avoids helper call if string has no array index value.
 */
#define DUK_HSTRING_GET_ARRIDX_FAST(h) \
	(DUK_HSTRING_HAS_ARRIDX((h)) ? duk_js_to_arrayindex_hstring_fast_known((h)) : DUK_HSTRING_NO_ARRAY_INDEX)

/* Slower but more compact variant. */
#define DUK_HSTRING_GET_ARRIDX_SLOW(h) (duk_js_to_arrayindex_hstring_fast((h)))
#endif

/* XXX: these actually fit into duk_hstring */
#define DUK_SYMBOL_TYPE_HIDDEN    0
#define DUK_SYMBOL_TYPE_GLOBAL    1
#define DUK_SYMBOL_TYPE_LOCAL     2
#define DUK_SYMBOL_TYPE_WELLKNOWN 3

/* Assertion for duk_hstring validity. */
#if defined(DUK_USE_ASSERTIONS)
DUK_INTERNAL_DECL void duk_hstring_assert_valid(duk_hstring *h);
#define DUK_HSTRING_ASSERT_VALID(h) \
	do { \
		duk_hstring_assert_valid((h)); \
	} while (0)
#else
#define DUK_HSTRING_ASSERT_VALID(h) \
	do { \
	} while (0)
#endif

/*
 *  Misc
 */

struct duk_hstring {
	/* Smaller heaphdr than for other objects, because strings are held
	 * in string intern table which requires no link pointers.  Much of
	 * the 32-bit flags field is unused by flags, so we can stuff a 16-bit
	 * field in there.
	 */
	duk_heaphdr_string hdr;

	/* String hash. */
#if defined(DUK_USE_STRHASH16)
	/* If 16-bit hash is in use, stuff it into duk_heaphdr_string flags. */
#else
	duk_uint32_t hash;
#endif

	/* Precomputed array index (or DUK_HSTRING_NO_ARRAY_INDEX). */
#if defined(DUK_USE_HSTRING_ARRIDX)
	duk_uarridx_t arridx;
#endif

	/* Length in bytes (not counting NUL term). */
#if defined(DUK_USE_STRLEN16)
	/* placed in duk_heaphdr_string */
#else
	duk_uint32_t blen;
#endif

	/* Length in codepoints (must be E5 compatible). */
#if defined(DUK_USE_STRLEN16)
#if defined(DUK_USE_HSTRING_CLEN)
	duk_uint16_t clen16;
#else
	/* computed live */
#endif
#else
	duk_uint32_t clen;
#endif

	/*
	 *  String data of 'blen+1' bytes follows (+1 for NUL termination
	 *  convenience for C API).  No alignment needs to be guaranteed
	 *  for strings, but fields above should guarantee alignment-by-4
	 *  (but not alignment-by-8).
	 */
};

/* The external string struct is defined even when the feature is inactive. */
struct duk_hstring_external {
	duk_hstring str;

	/*
	 *  For an external string, the NUL-terminated string data is stored
	 *  externally.  The user must guarantee that data behind this pointer
	 *  doesn't change while it's used.
	 */

	const duk_uint8_t *extdata;
};

/*
 *  Prototypes
 */

DUK_INTERNAL_DECL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr,
                                                                duk_hstring *h,
                                                                duk_uint_t pos,
                                                                duk_bool_t surrogate_aware);
DUK_INTERNAL_DECL duk_bool_t duk_hstring_equals_ascii_cstring(duk_hstring *h, const char *cstr);
DUK_INTERNAL_DECL duk_size_t duk_hstring_get_charlen(duk_hstring *h);
#if !defined(DUK_USE_HSTRING_LAZY_CLEN)
DUK_INTERNAL_DECL void duk_hstring_init_charlen(duk_hstring *h);
#endif

#endif /* DUK_HSTRING_H_INCLUDED */