File: duk_hstring_misc.c

package info (click to toggle)
duktape 2.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 21,160 kB
  • sloc: ansic: 215,359; python: 5,961; javascript: 4,555; makefile: 477; cpp: 205
file content (196 lines) | stat: -rw-r--r-- 5,811 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/*
 *  Misc support functions
 */

#include "duk_internal.h"

/*
 *  duk_hstring charCodeAt, with and without surrogate awareness
 */

DUK_INTERNAL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr,
                                                           duk_hstring *h,
                                                           duk_uint_t pos,
                                                           duk_bool_t surrogate_aware) {
	duk_uint32_t boff;
	const duk_uint8_t *p, *p_start, *p_end;
	duk_ucodepoint_t cp1;
	duk_ucodepoint_t cp2;

	/* Caller must check character offset to be inside the string. */
	DUK_ASSERT(thr != NULL);
	DUK_ASSERT(h != NULL);
	DUK_ASSERT_DISABLE(pos >= 0); /* unsigned */
	DUK_ASSERT(pos < (duk_uint_t) DUK_HSTRING_GET_CHARLEN(h));

	boff = (duk_uint32_t) duk_heap_strcache_offset_char2byte(thr, h, (duk_uint32_t) pos);
	DUK_DDD(DUK_DDDPRINT("charCodeAt: pos=%ld -> boff=%ld, str=%!O", (long) pos, (long) boff, (duk_heaphdr *) h));
	DUK_ASSERT_DISABLE(boff >= 0);
	DUK_ASSERT(boff < DUK_HSTRING_GET_BYTELEN(h));

	p_start = DUK_HSTRING_GET_DATA(h);
	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h);
	p = p_start + boff;
	DUK_DDD(DUK_DDDPRINT("p_start=%p, p_end=%p, p=%p", (const void *) p_start, (const void *) p_end, (const void *) p));

	/* For invalid UTF-8 (never happens for standard ECMAScript strings)
	 * return U+FFFD replacement character.
	 */
	if (duk_unicode_decode_xutf8(thr, &p, p_start, p_end, &cp1)) {
		if (surrogate_aware && cp1 >= 0xd800UL && cp1 <= 0xdbffUL) {
			/* The decode helper is memory safe even if 'cp1' was
			 * decoded at the end of the string and 'p' is no longer
			 * within string memory range.
			 */
			cp2 = 0; /* If call fails, this is left untouched and won't match cp2 check. */
			(void) duk_unicode_decode_xutf8(thr, &p, p_start, p_end, &cp2);
			if (cp2 >= 0xdc00UL && cp2 <= 0xdfffUL) {
				cp1 = (duk_ucodepoint_t) (((cp1 - 0xd800UL) << 10) + (cp2 - 0xdc00UL) + 0x10000UL);
			}
		}
	} else {
		cp1 = DUK_UNICODE_CP_REPLACEMENT_CHARACTER;
	}

	return cp1;
}

/*
 *  duk_hstring charlen, when lazy charlen disabled
 */

#if !defined(DUK_USE_HSTRING_LAZY_CLEN)
#if !defined(DUK_USE_HSTRING_CLEN)
#error non-lazy duk_hstring charlen but DUK_USE_HSTRING_CLEN not set
#endif
DUK_INTERNAL void duk_hstring_init_charlen(duk_hstring *h) {
	duk_uint32_t clen;

	DUK_ASSERT(h != NULL);
	DUK_ASSERT(!DUK_HSTRING_HAS_ASCII(h));
	DUK_ASSERT(!DUK_HEAPHDR_HAS_READONLY((duk_heaphdr *) h));

	clen = duk_unicode_unvalidated_utf8_length(DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h));
#if defined(DUK_USE_STRLEN16)
	DUK_ASSERT(clen <= 0xffffUL); /* Bytelength checked during interning. */
	h->clen16 = (duk_uint16_t) clen;
#else
	h->clen = (duk_uint32_t) clen;
#endif
	if (DUK_LIKELY(clen == DUK_HSTRING_GET_BYTELEN(h))) {
		DUK_HSTRING_SET_ASCII(h);
	}
}

DUK_INTERNAL DUK_HOT duk_size_t duk_hstring_get_charlen(duk_hstring *h) {
#if defined(DUK_USE_STRLEN16)
	return h->clen16;
#else
	return h->clen;
#endif
}
#endif /* !DUK_USE_HSTRING_LAZY_CLEN */

/*
 *  duk_hstring charlen, when lazy charlen enabled
 */

#if defined(DUK_USE_HSTRING_LAZY_CLEN)
#if defined(DUK_USE_HSTRING_CLEN)
DUK_LOCAL DUK_COLD duk_size_t duk__hstring_get_charlen_slowpath(duk_hstring *h) {
	duk_size_t res;

	DUK_ASSERT(h->clen == 0); /* Checked by caller. */

#if defined(DUK_USE_ROM_STRINGS)
	/* ROM strings have precomputed clen, but if the computed clen is zero
	 * we can still come here and can't write anything.
	 */
	if (DUK_HEAPHDR_HAS_READONLY((duk_heaphdr *) h)) {
		return 0;
	}
#endif

	res = duk_unicode_unvalidated_utf8_length(DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h));
#if defined(DUK_USE_STRLEN16)
	DUK_ASSERT(res <= 0xffffUL); /* Bytelength checked during interning. */
	h->clen16 = (duk_uint16_t) res;
#else
	h->clen = (duk_uint32_t) res;
#endif
	if (DUK_LIKELY(res == DUK_HSTRING_GET_BYTELEN(h))) {
		DUK_HSTRING_SET_ASCII(h);
	}
	return res;
}
#else /* DUK_USE_HSTRING_CLEN */
DUK_LOCAL duk_size_t duk__hstring_get_charlen_slowpath(duk_hstring *h) {
	if (DUK_LIKELY(DUK_HSTRING_HAS_ASCII(h))) {
		/* Most practical strings will go here. */
		return DUK_HSTRING_GET_BYTELEN(h);
	} else {
		/* ASCII flag is lazy, so set it here. */
		duk_size_t res;

		/* XXX: here we could use the strcache to speed up the
		 * computation (matters for 'i < str.length' loops).
		 */

		res = duk_unicode_unvalidated_utf8_length(DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h));

#if defined(DUK_USE_ROM_STRINGS)
		if (DUK_HEAPHDR_HAS_READONLY((duk_heaphdr *) h)) {
			/* For ROM strings, can't write anything; ASCII flag
			 * is preset so we don't need to update it.
			 */
			return res;
		}
#endif
		if (DUK_LIKELY(res == DUK_HSTRING_GET_BYTELEN(h))) {
			DUK_HSTRING_SET_ASCII(h);
		}
		return res;
	}
}
#endif /* DUK_USE_HSTRING_CLEN */

#if defined(DUK_USE_HSTRING_CLEN)
DUK_INTERNAL DUK_HOT duk_size_t duk_hstring_get_charlen(duk_hstring *h) {
#if defined(DUK_USE_STRLEN16)
	if (DUK_LIKELY(h->clen16 != 0)) {
		return h->clen16;
	}
#else
	if (DUK_LIKELY(h->clen != 0)) {
		return h->clen;
	}
#endif
	return duk__hstring_get_charlen_slowpath(h);
}
#else /* DUK_USE_HSTRING_CLEN */
DUK_INTERNAL DUK_HOT duk_size_t duk_hstring_get_charlen(duk_hstring *h) {
	/* Always use slow path. */
	return duk__hstring_get_charlen_slowpath(h);
}
#endif /* DUK_USE_HSTRING_CLEN */
#endif /* DUK_USE_HSTRING_LAZY_CLEN */

/*
 *  Compare duk_hstring to an ASCII cstring.
 */

DUK_INTERNAL duk_bool_t duk_hstring_equals_ascii_cstring(duk_hstring *h, const char *cstr) {
	duk_size_t len;

	DUK_ASSERT(h != NULL);
	DUK_ASSERT(cstr != NULL);

	len = DUK_STRLEN(cstr);
	if (len != DUK_HSTRING_GET_BYTELEN(h)) {
		return 0;
	}
	if (duk_memcmp((const void *) cstr, (const void *) DUK_HSTRING_GET_DATA(h), len) == 0) {
		return 1;
	}
	return 0;
}