1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
|
#pragma once
#include "Object.h"
#include "Char.h"
#include "GcArray.h"
#include "Maybe.h"
namespace storm {
STORM_PKG(core);
/**
* The string type used in Storm.
*
* Strings are immutable sequences of unicode codepoints. The implementation stores strings in
* UTF-16, but hides this fact by disallowing low-level access to the underlying representation.
* Because of this, direct access to the underlying representation is not allowed. Furthermore,
* indexed access is not allowed as it can not be implemented efficiently. Iterators can be used
* to step through codepoints.
*
* Note: We may want to enforce proper normalization of strings to avoid weird results.
*/
class Str : public Object {
STORM_CLASS;
public:
// Create an empty string.
STORM_CTOR Str();
// Create from a string literal.
explicit Str(const wchar *s);
#ifdef POSIX
// If wchar_t is a different size, allow creation from literals with wchar_t as well.
explicit Str(const wchar_t *s);
#endif
// Create from a substring of a c-string.
Str(const wchar *from, const wchar *to);
// Create a string from a buffer.
Str(GcArray<wchar> *data);
// Create from a single char or series of chars.
STORM_CTOR Str(Char ch);
STORM_CTOR Str(Char ch, Nat count);
// Is the string empty?
Bool STORM_FN empty() const;
// Does the string contain any characters?
Bool STORM_FN any() const;
// Concatenate strings.
Str *STORM_FN operator +(Str *o) const;
Str *operator +(const wchar *o) const;
#ifdef POSIX
Str *operator +(const wchar_t *o) const;
#endif
// Multiplication.
Str *STORM_FN operator *(Nat times) const;
// Equal to another string?
Bool STORM_FN operator ==(const Str &o) const;
// Lexiographically less than another string?
Bool STORM_FN operator <(const Str &o) const;
// Hash.
Nat STORM_FN hash() const;
// Convert to a number. Throws `StrError` on error.
Int STORM_FN toInt() const;
Nat STORM_FN toNat() const;
Long STORM_FN toLong() const;
Word STORM_FN toWord() const;
Float STORM_FN toFloat() const;
Double STORM_FN toDouble() const;
// Interpret as a hexadecimal number. Throws `StrError` on error.
Nat STORM_FN hexToNat() const;
Word STORM_FN hexToWord() const;
// Versions of the above that return a Maybe<T>.
Maybe<Int> STORM_FN STORM_NAME(asInt, int)() const;
Maybe<Nat> STORM_FN STORM_NAME(asNat, nat)() const;
Maybe<Long> STORM_FN STORM_NAME(asLong, long)() const;
Maybe<Word> STORM_FN STORM_NAME(asWord, word)() const;
Maybe<Float> STORM_FN STORM_NAME(asFloat, float)() const;
Maybe<Double> STORM_FN STORM_NAME(asDouble, double)() const;
Maybe<Nat> STORM_FN STORM_NAME(asHexNat, hexNat)() const;
Maybe<Word> STORM_FN STORM_NAME(asHexWord, hexWord)() const;
// Escape/unescape characters. Any unknown escape sequences are kept as they are. The
// parameters `extra` and `extra` are additional characters that should be escaped/unescaped
// if present.
Str *STORM_FN unescape() const;
Str *STORM_FN unescape(Char extra) const;
Str *STORM_FN unescape(Char extra, Char extra2) const;
Str *STORM_FN escape() const;
Str *STORM_FN escape(Char extra) const;
Str *STORM_FN escape(Char extra, Char extra2) const;
// Version of `unescape` that keeps sequences of `\\` intact. This is useful when using this
// `unescape` as a first pass for other languages (e.g. regex where `.` and `[` also needs to be
// escaped at a later stage).
Str *STORM_FN unescapeKeepBackslash(Char extra) const;
// Does the string start with the string `s`?
Bool STORM_FN startsWith(const Str *s) const;
Bool startsWith(const wchar *s) const;
// Does the string end with the string `s`?
Bool STORM_FN endsWith(const Str *s) const;
Bool endsWith(const wchar *s) const;
// Does the string contain the substring `s`? Note that the implementation is not
// necessarily efficient for long search strings.
Bool STORM_FN contains(const Str *s) const;
// Deep copy (nothing needs to be done really).
virtual void STORM_FN deepCopy(CloneEnv *env);
// To string.
virtual Str *STORM_FN toS() const;
virtual void STORM_FN toS(StrBuf *buf) const;
// Get a c-string.
const wchar *c_str() const;
// Get an UTF-8 encoded c-string allocated on the GC heap.
const char *utf8_str() const;
// Convert to/from cr-lf line endings. Returns the same string if possible.
Str *STORM_FN toCrLf() const;
Str *STORM_FN fromCrLf() const;
// Compare to c-string.
Bool operator ==(const wchar *s) const;
Bool operator !=(const wchar *s) const;
// Count the number of characters in the string. This counts the number of steps the
// iterators would take when iterating through the representation. That is, this count
// represents the number of code points in the string.
Nat STORM_FN count() const;
// Peek at the length of the underlying representation.
Nat peekLength() const;
/**
* Iterator.
*/
class Iter {
STORM_VALUE;
public:
// Create an iterator to end.
STORM_CTOR Iter();
// Deep copy.
void STORM_FN deepCopy(CloneEnv *env);
// Advance.
Iter &STORM_FN operator ++();
Iter STORM_FN operator ++(int dummy);
Iter STORM_FN operator +(Nat steps) const;
// Compute difference.
Nat STORM_FN operator -(const Iter &o) const;
// Compare.
Bool STORM_FN operator ==(const Iter &o) const;
Bool STORM_FN operator !=(const Iter &o) const;
Bool STORM_FN operator >(const Iter &o) const;
Bool STORM_FN operator <(const Iter &o) const;
Bool STORM_FN operator >=(const Iter &o) const;
Bool STORM_FN operator <=(const Iter &o) const;
// Get the value.
Char operator *() const;
Char STORM_FN v() const;
// Peek at the raw offset.
inline Nat offset() const { return pos; }
// Peek at the string.
inline const Str *data() const { return owner; }
// Output, for convenience.
void STORM_FN toS(StrBuf *to) const;
private:
friend class Str;
// Create iterator to start.
Iter(const Str *str, Nat pos);
// String we're referring to.
const Str *owner;
Nat pos;
// At the end?
bool atEnd() const;
};
// Begin and end.
Iter STORM_FN begin() const;
Iter STORM_FN end() const;
// Get an iterator to a specific position.
Iter posIter(Nat pos) const;
// Old name for 'cut'.
Str *STORM_FN substr(Iter from) const;
Str *STORM_FN substr(Iter from, Iter to) const;
// Extract a substring, starting at `from` until the end of the string.
Str *STORM_FN cut(Iter from) const;
// Extract a substring, starting at `from` until, but not including, `to`.
Str *STORM_FN cut(Iter from, Iter to) const;
// Remove characters from the middle of the string.
Str *STORM_FN remove(Iter from, Iter to) const;
// Insert an entire string at a given position.
Str *STORM_FN insert(Iter pos, Str *str) const;
// Find a character in the string. Returns the first appearance of the character.
Iter STORM_FN find(Char ch) const;
Iter STORM_FN find(Char ch, Iter start) const;
// Find a substring in the string. Returns the first match. Note: this approach is not
// necessarily optimal for long search strings.
Iter STORM_FN find(const Str *str) const;
Iter STORM_FN find(const Str *str, Iter start) const;
// Find the last occurrence of `ch` in the string. Note that 'last' is *not* examined.
Iter STORM_FN findLast(Char ch) const;
Iter STORM_FN findLast(Char ch, Iter last) const;
// Find the last occurrence of `str` in the string. Note that the match has to end before
// `last` if specified.
Iter STORM_FN findLast(const Str *str) const;
Iter STORM_FN findLast(const Str *str, Iter last) const;
// Read/write (raw).
void STORM_FN write(OStream *to) const;
static Str *STORM_FN read(IStream *from);
static Str *STORM_FN read(IStream *from, Nat limitBytes);
// Serialization.
void STORM_FN write(ObjOStream *to) const;
static Str *STORM_FN read(ObjIStream *from);
// Called from the serialization API.
explicit Str(ObjIStream *from);
private:
friend class Iter;
friend class StrBuf;
// Create a string from the stream. Use 'read' from Storm.
explicit Str(IStream *from);
explicit Str(IStream *from, Nat limitBytes);
// Data we're storing. Always null-terminated or null.
GcArray<wchar> *data;
// Number of characters in 'data'.
inline nat charCount() const { return nat(data->count - 1); }
// Concatenation constructor.
Str(const Str *a, const Str *b);
Str(const Str *a, const wchar *b);
// Repetition constructor.
Str(const Str *a, Nat times);
// Create from two substrings of a c-string.
Str(const wchar *fromA, const wchar *toA, const wchar *fromB, const wchar *toB);
// Create by inserting a string at a specific position.
Str(const Str *into, const Iter &pos, const Str *insert);
// Allocate 'data'.
void allocData(nat count);
// Convert an iterator to a pointer.
const wchar *toPtr(const Iter &i) const;
// Validate this string.
void validate() const;
};
// Remove the indentation from a string.
Str *STORM_FN removeIndentation(Str *str);
// Remove leading and trailing empty lines from a string.
Str *STORM_FN trimBlankLines(Str *src);
// Strip whitespace from a string.
Str *STORM_FN trimWhitespace(Str *src);
#ifdef POSIX
// Low-level string operations for UTF-16.
size_t wcslen(const wchar *str);
int wcscmp(const wchar *a, const wchar *b);
#endif
}
|