File: validate_utf16.cpp

package info (click to toggle)
simdutf 7.7.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,244 kB
  • sloc: cpp: 60,074; ansic: 14,226; python: 3,364; sh: 321; makefile: 12
file content (60 lines) | stat: -rw-r--r-- 1,336 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include "validate_utf16.h"

#ifndef SIMDUTF_IS_BIG_ENDIAN
  #error "SIMDUTF_IS_BIG_ENDIAN should be defined."
#endif

namespace simdutf {
namespace tests {
namespace reference {

simdutf_warn_unused bool validate_utf16(endianness utf16_endianness,
                                        const char16_t *buf,
                                        size_t len) noexcept {
  const char16_t *curr = buf;
  const char16_t *end = buf + len;

  while (curr != end) {
    uint16_t W1;
    if (!match_system(utf16_endianness)) {
      W1 = uint16_t((uint16_t(*curr) << 8) | (uint16_t(*curr) >> 8));
    } else {
      W1 = *curr;
    }

    curr += 1;

    // fast path, code point is equal to character's value
    if (W1 < 0xd800 || W1 > 0xdfff) {
      continue;
    }

    // W1 must be in range 0xd800 .. 0xdbff
    if (W1 > 0xdbff) {
      return false;
    }

    // required the next word, but we're already at the end of data
    if (curr == end) {
      return false;
    }

    uint16_t W2;
    if (!match_system(utf16_endianness)) {
      W2 = uint16_t((uint16_t(*curr) << 8) | (uint16_t(*curr) >> 8));
    } else {
      W2 = *curr;
    }

    if (W2 < 0xdc00 || W2 > 0xdfff) // W2 = 0xdc00 .. 0xdfff
      return false;

    curr += 1;
  }

  return true;
}

} // namespace reference
} // namespace tests
} // namespace simdutf