1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
#include "validate_utf16.h"
#ifndef SIMDUTF_IS_BIG_ENDIAN
#error "SIMDUTF_IS_BIG_ENDIAN should be defined."
#endif
namespace simdutf {
namespace tests {
namespace reference {
simdutf_warn_unused bool validate_utf16(endianness utf16_endianness,
const char16_t *buf,
size_t len) noexcept {
const char16_t *curr = buf;
const char16_t *end = buf + len;
while (curr != end) {
uint16_t W1;
if (!match_system(utf16_endianness)) {
W1 = uint16_t((uint16_t(*curr) << 8) | (uint16_t(*curr) >> 8));
} else {
W1 = *curr;
}
curr += 1;
// fast path, code point is equal to character's value
if (W1 < 0xd800 || W1 > 0xdfff) {
continue;
}
// W1 must be in range 0xd800 .. 0xdbff
if (W1 > 0xdbff) {
return false;
}
// required the next word, but we're already at the end of data
if (curr == end) {
return false;
}
uint16_t W2;
if (!match_system(utf16_endianness)) {
W2 = uint16_t((uint16_t(*curr) << 8) | (uint16_t(*curr) >> 8));
} else {
W2 = *curr;
}
if (W2 < 0xdc00 || W2 > 0xdfff) // W2 = 0xdc00 .. 0xdfff
return false;
curr += 1;
}
return true;
}
} // namespace reference
} // namespace tests
} // namespace simdutf
|