File: encode.ha

package info (click to toggle)
hare 0.24.2-4
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,756 kB
  • sloc: asm: 1,180; sh: 119; makefile: 116; lisp: 99
file content (48 lines) | stat: -rw-r--r-- 1,107 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

// Encodes a rune as UTF-8 and returns the result as a slice. The return value
// is statically allocated, and will not be consistent after subsequent calls to
// encoderune.
export fn encoderune(r: rune) []u8 = {
	let ch = r: u32, n = 0z, first = 0u8;
	assert((ch < 0xD800 || ch > 0xDFFF) && ch <= 0x10FFFF,
		"the rune is not a valid Unicode codepoint");

	if (ch < 0x80) {
		first = 0;
		n = 1;
	} else if (ch < 0x800) {
		first = 0xC0;
		n = 2;
	} else if (ch < 0x10000) {
		first = 0xE0;
		n = 3;
	} else {
		first = 0xF0;
		n = 4;
	};

	static let buf: [4]u8 = [0...];
	for (let i = n - 1; i > 0; i -= 1) {
		buf[i] = ch: u8 & 0x3F | 0x80;
		ch >>= 6;
	};
	buf[0] = ch: u8 | first;
	return buf[..n];
};

@test fn encode() void = {
	const expected: [_][]u8 = [
		[0],
		[0x25],
		[0xE3, 0x81, 0x93],
	];
	const inputs = ['\0', '%', 'こ'];
	for (let i = 0z; i < len(inputs); i += 1) {
		const out = encoderune(inputs[i]);
		for (let j = 0z; j < len(expected[i]); j += 1) {
			assert(out[j] == expected[i][j]);
		};
	};
};