File: Str.h

package info (click to toggle)
storm-lang 0.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 52,004 kB
  • sloc: ansic: 261,462; cpp: 140,405; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (307 lines) | stat: -rw-r--r-- 9,460 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#pragma once
#include "Object.h"
#include "Char.h"
#include "GcArray.h"
#include "Maybe.h"

namespace storm {
	STORM_PKG(core);

	/**
	 * The string type used in Storm.
	 *
	 * Strings are immutable sequences of unicode codepoints. The implementation stores strings in
	 * UTF-16, but hides this fact by disallowing low-level access to the underlying representation.
	 * Because of this, direct access to the underlying representation is not allowed. Furthermore,
	 * indexed access is not allowed as it can not be implemented efficiently. Iterators can be used
	 * to step through codepoints.
	 *
	 * Note: We may want to enforce proper normalization of strings to avoid weird results.
	 */
	class Str : public Object {
		STORM_CLASS;
	public:
		// Create an empty string.
		STORM_CTOR Str();

		// Create from a string literal.
		explicit Str(const wchar *s);

#ifdef POSIX
		// If wchar_t is a different size, allow creation from literals with wchar_t as well.
		explicit Str(const wchar_t *s);
#endif

		// Create from a substring of a c-string.
		Str(const wchar *from, const wchar *to);

		// Create a string from a buffer.
		Str(GcArray<wchar> *data);

		// Create from a single char or series of chars.
		STORM_CTOR Str(Char ch);
		STORM_CTOR Str(Char ch, Nat count);

		// Is the string empty?
		Bool STORM_FN empty() const;

		// Does the string contain any characters?
		Bool STORM_FN any() const;

		// Concatenate strings.
		Str *STORM_FN operator +(Str *o) const;
		Str *operator +(const wchar *o) const;
#ifdef POSIX
		Str *operator +(const wchar_t *o) const;
#endif

		// Multiplication.
		Str *STORM_FN operator *(Nat times) const;

		// Equal to another string?
		Bool STORM_FN operator ==(const Str &o) const;

		// Lexiographically less than another string?
		Bool STORM_FN operator <(const Str &o) const;

		// Hash.
		Nat STORM_FN hash() const;

		// Convert to a number. Throws `StrError` on error.
		Int STORM_FN toInt() const;
		Nat STORM_FN toNat() const;
		Long STORM_FN toLong() const;
		Word STORM_FN toWord() const;
		Float STORM_FN toFloat() const;
		Double STORM_FN toDouble() const;

		// Interpret as a hexadecimal number. Throws `StrError` on error.
		Nat STORM_FN hexToNat() const;
		Word STORM_FN hexToWord() const;

		// Versions of the above that return a Maybe<T>.
		Maybe<Int> STORM_FN STORM_NAME(asInt, int)() const;
		Maybe<Nat> STORM_FN STORM_NAME(asNat, nat)() const;
		Maybe<Long> STORM_FN STORM_NAME(asLong, long)() const;
		Maybe<Word> STORM_FN STORM_NAME(asWord, word)() const;
		Maybe<Float> STORM_FN STORM_NAME(asFloat, float)() const;
		Maybe<Double> STORM_FN STORM_NAME(asDouble, double)() const;
		Maybe<Nat> STORM_FN STORM_NAME(asHexNat, hexNat)() const;
		Maybe<Word> STORM_FN STORM_NAME(asHexWord, hexWord)() const;

		// Escape/unescape characters. Any unknown escape sequences are kept as they are. The
		// parameters `extra` and `extra` are additional characters that should be escaped/unescaped
		// if present.
		Str *STORM_FN unescape() const;
		Str *STORM_FN unescape(Char extra) const;
		Str *STORM_FN unescape(Char extra, Char extra2) const;
		Str *STORM_FN escape() const;
		Str *STORM_FN escape(Char extra) const;
		Str *STORM_FN escape(Char extra, Char extra2) const;

		// Version of `unescape` that keeps sequences of `\\` intact. This is useful when using this
		// `unescape` as a first pass for other languages (e.g. regex where `.` and `[` also needs to be
		// escaped at a later stage).
		Str *STORM_FN unescapeKeepBackslash(Char extra) const;

		// Does the string start with the string `s`?
		Bool STORM_FN startsWith(const Str *s) const;
		Bool startsWith(const wchar *s) const;

		// Does the string end with the string `s`?
		Bool STORM_FN endsWith(const Str *s) const;
		Bool endsWith(const wchar *s) const;

		// Does the string contain the substring `s`? Note that the implementation is not
		// necessarily efficient for long search strings.
		Bool STORM_FN contains(const Str *s) const;

		// Deep copy (nothing needs to be done really).
		virtual void STORM_FN deepCopy(CloneEnv *env);

		// To string.
		virtual Str *STORM_FN toS() const;
		virtual void STORM_FN toS(StrBuf *buf) const;

		// Get a c-string.
		const wchar *c_str() const;

		// Get an UTF-8 encoded c-string allocated on the GC heap.
		const char *utf8_str() const;

		// Convert to/from cr-lf line endings. Returns the same string if possible.
		Str *STORM_FN toCrLf() const;
		Str *STORM_FN fromCrLf() const;

		// Compare to c-string.
		Bool operator ==(const wchar *s) const;
		Bool operator !=(const wchar *s) const;

		// Count the number of characters in the string. This counts the number of steps the
		// iterators would take when iterating through the representation. That is, this count
		// represents the number of code points in the string.
		Nat STORM_FN count() const;

		// Peek at the length of the underlying representation.
		Nat peekLength() const;

		/**
		 * Iterator.
		 */
		class Iter {
			STORM_VALUE;
		public:
			// Create an iterator to end.
			STORM_CTOR Iter();

			// Deep copy.
			void STORM_FN deepCopy(CloneEnv *env);

			// Advance.
			Iter &STORM_FN operator ++();
			Iter STORM_FN operator ++(int dummy);
			Iter STORM_FN operator +(Nat steps) const;

			// Compute difference.
			Nat STORM_FN operator -(const Iter &o) const;

			// Compare.
			Bool STORM_FN operator ==(const Iter &o) const;
			Bool STORM_FN operator !=(const Iter &o) const;
			Bool STORM_FN operator >(const Iter &o) const;
			Bool STORM_FN operator <(const Iter &o) const;
			Bool STORM_FN operator >=(const Iter &o) const;
			Bool STORM_FN operator <=(const Iter &o) const;

			// Get the value.
			Char operator *() const;
			Char STORM_FN v() const;

			// Peek at the raw offset.
			inline Nat offset() const { return pos; }

			// Peek at the string.
			inline const Str *data() const { return owner; }

			// Output, for convenience.
			void STORM_FN toS(StrBuf *to) const;

		private:
			friend class Str;

			// Create iterator to start.
			Iter(const Str *str, Nat pos);

			// String we're referring to.
			const Str *owner;
			Nat pos;

			// At the end?
			bool atEnd() const;
		};

		// Begin and end.
		Iter STORM_FN begin() const;
		Iter STORM_FN end() const;

		// Get an iterator to a specific position.
		Iter posIter(Nat pos) const;

		// Old name for 'cut'.
		Str *STORM_FN substr(Iter from) const;
		Str *STORM_FN substr(Iter from, Iter to) const;

		// Extract a substring, starting at `from` until the end of the string.
		Str *STORM_FN cut(Iter from) const;

		// Extract a substring, starting at `from` until, but not including, `to`.
		Str *STORM_FN cut(Iter from, Iter to) const;

		// Remove characters from the middle of the string.
		Str *STORM_FN remove(Iter from, Iter to) const;

		// Insert an entire string at a given position.
		Str *STORM_FN insert(Iter pos, Str *str) const;

		// Find a character in the string. Returns the first appearance of the character.
		Iter STORM_FN find(Char ch) const;
		Iter STORM_FN find(Char ch, Iter start) const;

		// Find a substring in the string. Returns the first match. Note: this approach is not
		// necessarily optimal for long search strings.
		Iter STORM_FN find(const Str *str) const;
		Iter STORM_FN find(const Str *str, Iter start) const;

		// Find the last occurrence of `ch` in the string. Note that 'last' is *not* examined.
		Iter STORM_FN findLast(Char ch) const;
		Iter STORM_FN findLast(Char ch, Iter last) const;

		// Find the last occurrence of `str` in the string. Note that the match has to end before
		// `last` if specified.
		Iter STORM_FN findLast(const Str *str) const;
		Iter STORM_FN findLast(const Str *str, Iter last) const;

		// Read/write (raw).
		void STORM_FN write(OStream *to) const;
		static Str *STORM_FN read(IStream *from);
		static Str *STORM_FN read(IStream *from, Nat limitBytes);

		// Serialization.
		void STORM_FN write(ObjOStream *to) const;
		static Str *STORM_FN read(ObjIStream *from);

		// Called from the serialization API.
		explicit Str(ObjIStream *from);

	private:
		friend class Iter;
		friend class StrBuf;

		// Create a string from the stream. Use 'read' from Storm.
		explicit Str(IStream *from);
		explicit Str(IStream *from, Nat limitBytes);

		// Data we're storing. Always null-terminated or null.
		GcArray<wchar> *data;

		// Number of characters in 'data'.
		inline nat charCount() const { return nat(data->count - 1); }

		// Concatenation constructor.
		Str(const Str *a, const Str *b);
		Str(const Str *a, const wchar *b);

		// Repetition constructor.
		Str(const Str *a, Nat times);

		// Create from two substrings of a c-string.
		Str(const wchar *fromA, const wchar *toA, const wchar *fromB, const wchar *toB);

		// Create by inserting a string at a specific position.
		Str(const Str *into, const Iter &pos, const Str *insert);

		// Allocate 'data'.
		void allocData(nat count);

		// Convert an iterator to a pointer.
		const wchar *toPtr(const Iter &i) const;

		// Validate this string.
		void validate() const;
	};

	// Remove the indentation from a string.
	Str *STORM_FN removeIndentation(Str *str);

	// Remove leading and trailing empty lines from a string.
	Str *STORM_FN trimBlankLines(Str *src);

	// Strip whitespace from a string.
	Str *STORM_FN trimWhitespace(Str *src);

#ifdef POSIX
	// Low-level string operations for UTF-16.
	size_t wcslen(const wchar *str);
	int wcscmp(const wchar *a, const wchar *b);
#endif
}