File: encode_utf16.cpp

package info (click to toggle)
simdutf 7.7.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,244 kB
  • sloc: cpp: 60,074; ansic: 14,226; python: 3,364; sh: 321; makefile: 12
file content (49 lines) | stat: -rw-r--r-- 1,182 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include "encode_utf16.h"

namespace simdutf {
namespace tests {
namespace reference {
namespace utf16 {

// returns whether the value can be represented in the UTF-16
bool valid_value(uint32_t value) {
  /*
    RFC-2781 (2. UTF-16 definition):

    Characters with values greater than 0x10FFFF cannot be encoded in UTF-16.
  */
  if (value > 0x10FFFF)
    return false;

  /*
    RFC-2781 (2. UTF-16 definition):

    Note: Values between 0xD800 and 0xDFFF are specifically reserved for
    use with UTF-16, and don't have any characters assigned to them.
  */
  if ((value >= 0xD800) && (value <= 0xDFFF))
    return false;

  return true;
}

// Encodes the value using either one or two code units (returns 1 or 2
// respectively) Returns 0 if the value cannot be encoded
int encode(uint32_t value, char16_t &W1, char16_t &W2) {
  if (!valid_value(value))
    return 0;

  if (value <= 0xffff) {
    W1 = char16_t(value);
    return 1;
  } else {
    value -= 0x10000;
    W1 = char16_t(0xd800 | ((value >> 10) & 0x03ff));
    W2 = char16_t(0xdc00 | (value & 0x03ff));
    return 2;
  }
}
} // namespace utf16
} // namespace reference
} // namespace tests
} // namespace simdutf