File: 008_unicode_reuse.re

package info (click to toggle)
re2c 4.1-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 50,052 kB
  • sloc: cpp: 32,477; ml: 8,279; sh: 5,265; makefile: 968; haskell: 612; python: 428; ansic: 227; javascript: 111; java: 3
file content (72 lines) | stat: -rw-r--r-- 1,479 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//go:generate re2go $INPUT -o $OUTPUT -ir --input-encoding utf8
package main

/*!rules:re2c
	re2c:yyfill:enable = 0;
	re2c:define:YYPEEK = "str[cursor]";
	re2c:define:YYSKIP = "cursor += 1";
	re2c:define:YYBACKUP  = "marker = cursor";
	re2c:define:YYRESTORE = "cursor = marker";

	end = [\x00];
	vowel = "a" | "э" | "ы" | "о" | "у";

	*                   { return 1 }
	vowel "г" vowel end { return 0 }
*/

func LexUtf8(str string) int {
	var cursor, marker int
	/*!use:re2c
	re2c:define:YYCTYPE = byte;
	re2c:flags:8 = 1;

	"\U00012014" end { return 2 }
	*/
}

func LexUtf16(str []uint16) int {
	var cursor, marker int
	/*!use:re2c
	re2c:define:YYCTYPE = uint16;
	re2c:flags:x = 1;

	"\U00012014" end { return 2 }
	*/
}

func LexUcs2(str []uint16) int {
	var cursor, marker int
	/*!use:re2c
	re2c:define:YYCTYPE = uint16;
	re2c:flags:w = 1;
	*/
}

func LexUtf32(str []uint32) int {
	var cursor, marker int
	/*!use:re2c
	re2c:define:YYCTYPE = uint32;
	re2c:flags:u = 1;

	"\U00012014" end { return 2 }
	*/
}

func main() {
	if LexUtf8("ыгы\000") != 0 ||
		LexUtf8(string([]byte{0xf0, 0x92, 0x80, 0x94, 0})) != 2 {
		panic("UTF8 failed")
	}
	if LexUtf16([]uint16{0x44b, 0x433, 0x44b, 0}) != 0 ||
		LexUtf16([]uint16{0xd808, 0xdc14, 0}) != 2 {
		panic("UTF16 failed")
	}
	if LexUcs2([]uint16{0x44b, 0x433, 0x44b, 0}) != 0 {
		panic("UCS2 failed")
	}
	if LexUtf32([]uint32{0x44b, 0x433, 0x44b, 0}) != 0 ||
		LexUtf32([]uint32{0x12014, 0}) != 2 {
		panic("UTF32 failed")
	}
}