1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
|
/*
AngelCode Scripting Library
Copyright (c) 2003-2017 Andreas Jonsson
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any
damages arising from the use of this software.
Permission is granted to anyone to use this software for any
purpose, including commercial applications, and to alter it and
redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you
must not claim that you wrote the original software. If you use
this software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and
must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
The original version of this library can be located at:
http://www.angelcode.com/angelscript/
Andreas Jonsson
andreas@angelcode.com
*/
#include "as_config.h"
#include <string.h> // some compilers declare memcpy() here
#include <math.h> // pow()
#if !defined(AS_NO_MEMORY_H)
#include <memory.h>
#endif
#include "as_string.h"
#include "as_string_util.h"
BEGIN_AS_NAMESPACE
int asCompareStrings(const char *str1, size_t len1, const char *str2, size_t len2) {
if (len1 == 0) {
if (str2 == 0 || len2 == 0) return 0; // Equal
return 1; // The other string is larger than this
}
if (str2 == 0) {
if (len1 == 0)
return 0; // Equal
return -1; // The other string is smaller than this
}
if (len2 < len1) {
int result = memcmp(str1, str2, len2);
if (result == 0) return -1; // The other string is smaller than this
return result;
}
int result = memcmp(str1, str2, len1);
if (result == 0 && len1 < len2) return 1; // The other string is larger than this
return result;
}
double asStringScanDouble(const char *string, size_t *numScanned) {
// I decided to do my own implementation of strtod() because this function
// doesn't seem to be present on all systems. iOS 5 for example doesn't appear
// to include the function in the standard lib.
// Another reason is that the standard implementation of strtod() is dependent
// on the locale on some systems, i.e. it may use comma instead of dot for
// the decimal indicator. This can be avoided by forcing the locale to "C" with
// setlocale(), but this is another thing that is highly platform dependent.
double value = 0;
double fraction = 0.1;
int exponent = 0;
bool negativeExponent = false;
int c = 0;
// The tokenizer separates the sign from the number in
// two tokens so we'll never have a sign to parse here
// Parse the integer value
for (;;) {
if (string[c] >= '0' && string[c] <= '9')
value = value * 10 + double(string[c] - '0');
else
break;
c++;
}
if (string[c] == '.') {
c++;
// Parse the fraction
for (;;) {
if (string[c] >= '0' && string[c] <= '9')
value += fraction * double(string[c] - '0');
else
break;
c++;
fraction *= 0.1;
}
}
if (string[c] == 'e' || string[c] == 'E') {
c++;
// Parse the sign of the exponent
if (string[c] == '-') {
negativeExponent = true;
c++;
} else if (string[c] == '+')
c++;
// Parse the exponent value
for (;;) {
if (string[c] >= '0' && string[c] <= '9')
exponent = exponent * 10 + int(string[c] - '0');
else
break;
c++;
}
}
if (exponent) {
if (negativeExponent)
exponent = -exponent;
value *= pow(10.0, exponent);
}
if (numScanned)
*numScanned = c;
return value;
}
// Converts a character to the decimal number based on the radix
// Returns -1 if the character is not valid for the radix
static int asCharToNbr(char ch, int radix) {
if (ch >= '0' && ch <= '9') return ((ch -= '0') < radix ? ch : -1);
if (ch >= 'A' && ch <= 'Z') return ((ch -= 'A' - 10) < radix ? ch : -1);
if (ch >= 'a' && ch <= 'z') return ((ch -= 'a' - 10) < radix ? ch : -1);
return -1;
}
// If base is 0 the string should be prefixed by 0x, 0d, 0o, or 0b to allow the function to automatically determine the radix
asQWORD asStringScanUInt64(const char *string, int base, size_t *numScanned, bool *overflow) {
asASSERT(base == 10 || base == 16 || base == 0);
if (overflow)
*overflow = false;
const char *end = string;
static const asQWORD QWORD_MAX = (~asQWORD(0));
asQWORD res = 0;
if (base == 10) {
while (*end >= '0' && *end <= '9') {
if (overflow && ((res > QWORD_MAX / 10) || ((asUINT(*end - '0') > (QWORD_MAX - (QWORD_MAX / 10) * 10)) && res == QWORD_MAX / 10)))
*overflow = true;
res *= 10;
res += *end++ - '0';
}
} else {
if (base == 0 && string[0] == '0') {
// Determine the radix from the prefix
switch (string[1]) {
case 'b':
case 'B':
base = 2;
break;
case 'o':
case 'O':
base = 8;
break;
case 'd':
case 'D':
base = 10;
break;
case 'x':
case 'X':
base = 16;
break;
}
end += 2;
}
asASSERT(base);
if (base) {
for (int nbr; (nbr = asCharToNbr(*end, base)) >= 0; end++) {
if (overflow && ((res > QWORD_MAX / base) || ((asUINT(nbr) > (QWORD_MAX - (QWORD_MAX / base) * base)) && res == QWORD_MAX / base)))
*overflow = true;
res = res * base + nbr;
}
}
}
if (numScanned)
*numScanned = end - string;
return res;
}
//
// The function will encode the unicode code point into the outEncodedBuffer, and then
// return the length of the encoded value. If the input value is not a valid unicode code
// point, then the function will return -1.
//
// This function is taken from the AngelCode ToolBox.
//
int asStringEncodeUTF8(unsigned int value, char *outEncodedBuffer) {
unsigned char *buf = (unsigned char *)outEncodedBuffer;
int length = -1;
if (value <= 0x7F) {
buf[0] = static_cast<unsigned char>(value);
return 1;
} else if (value >= 0x80 && value <= 0x7FF) {
// Encode it with 2 characters
buf[0] = static_cast<unsigned char>(0xC0 + (value >> 6));
length = 2;
} else if ((value >= 0x800 && value <= 0xD7FF) || (value >= 0xE000 && value <= 0xFFFF)) {
// Note: Values 0xD800 to 0xDFFF are not valid unicode characters
buf[0] = static_cast<unsigned char>(0xE0 + (value >> 12));
length = 3;
} else if (value >= 0x10000 && value <= 0x10FFFF) {
buf[0] = static_cast<unsigned char>(0xF0 + (value >> 18));
length = 4;
}
int n = length - 1;
for (; n > 0; n--) {
buf[n] = static_cast<unsigned char>(0x80 + (value & 0x3F));
value >>= 6;
}
return length;
}
//
// The function will decode an UTF8 character and return the unicode code point.
// outLength will receive the number of bytes that were decoded.
//
// This function is taken from the AngelCode ToolBox.
//
int asStringDecodeUTF8(const char *encodedBuffer, unsigned int *outLength) {
const unsigned char *buf = (const unsigned char *)encodedBuffer;
int value = 0;
int length = -1;
unsigned char byte = buf[0];
if ((byte & 0x80) == 0) {
// This is the only byte
if (outLength) *outLength = 1;
return byte;
} else if ((byte & 0xE0) == 0xC0) {
// There is one more byte
value = int(byte & 0x1F);
length = 2;
// The value at this moment must not be less than 2, because
// that should have been encoded with one byte only.
if (value < 2)
length = -1;
} else if ((byte & 0xF0) == 0xE0) {
// There are two more bytes
value = int(byte & 0x0F);
length = 3;
} else if ((byte & 0xF8) == 0xF0) {
// There are three more bytes
value = int(byte & 0x07);
length = 4;
}
int n = 1;
for (; n < length; n++) {
byte = buf[n];
if ((byte & 0xC0) == 0x80)
value = (value << 6) + int(byte & 0x3F);
else
break;
}
if (n == length) {
if (outLength) *outLength = (unsigned)length;
return value;
}
// The byte sequence isn't a valid UTF-8 byte sequence.
return -1;
}
//
// The function will encode the unicode code point into the outEncodedBuffer, and then
// return the length of the encoded value. If the input value is not a valid unicode code
// point, then the function will return -1.
//
// This function is taken from the AngelCode ToolBox.
//
int asStringEncodeUTF16(unsigned int value, char *outEncodedBuffer) {
if (value < 0x10000) {
#ifndef AS_BIG_ENDIAN
outEncodedBuffer[0] = (value & 0xFF);
outEncodedBuffer[1] = ((value >> 8) & 0xFF);
#else
outEncodedBuffer[1] = (value & 0xFF);
outEncodedBuffer[0] = ((value >> 8) & 0xFF);
#endif
return 2;
} else {
value -= 0x10000;
int surrogate1 = ((value >> 10) & 0x3FF) + 0xD800;
int surrogate2 = (value & 0x3FF) + 0xDC00;
#ifndef AS_BIG_ENDIAN
outEncodedBuffer[0] = (surrogate1 & 0xFF);
outEncodedBuffer[1] = ((surrogate1 >> 8) & 0xFF);
outEncodedBuffer[2] = (surrogate2 & 0xFF);
outEncodedBuffer[3] = ((surrogate2 >> 8) & 0xFF);
#else
outEncodedBuffer[1] = (surrogate1 & 0xFF);
outEncodedBuffer[0] = ((surrogate1 >> 8) & 0xFF);
outEncodedBuffer[3] = (surrogate2 & 0xFF);
outEncodedBuffer[2] = ((surrogate2 >> 8) & 0xFF);
#endif
return 4;
}
}
END_AS_NAMESPACE
|