File: genucd.py

package info (click to toggle)
mujs 1.3.8-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 756 kB
  • sloc: ansic: 17,132; makefile: 149; sh: 121; javascript: 109; python: 101
file content (117 lines) | stat: -rw-r--r-- 2,682 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Create utfdata.h from UnicodeData.txt and SpecialCasing.txt

import sys

tolower = []
toupper = []
tolower_full = []
toupper_full = []
isalpha = []

for line in open(sys.argv[1]).readlines():
	line = line.split(";")
	code = int(line[0],16)
	# if code > 65535: continue # skip non-BMP codepoints
	if line[2][0] == 'L':
		isalpha.append(code)
	if line[12]:
		toupper.append((code,int(line[12],16)))
	if line[13]:
		tolower.append((code,int(line[13],16)))

for line in open(sys.argv[2]).readlines():
	# SpecialCasing.txt -- code; lower; title; upper; (condition;)? # comment
	line = line.strip()
	if len(line) == 0:
		continue
	if line[0] == "#":
		continue
	line = line.split(";")
	code = int(line[0],16)
	lower = line[1].strip()
	upper = line[3].strip()
	if len(lower) == 0 or len(upper) == 0:
		continue
	condition = line[4].split("#")[0].strip()
	if len(condition) > 0:
		continue
	lower = list(map(lambda x: int(x,16), lower.split(" ")))
	upper = list(map(lambda x: int(x,16), upper.split(" ")))
	if lower[0] != code:
		tolower_full.append([code] + lower)
	if upper[0] != code:
		toupper_full.append([code] + upper)

tolower_full.sort()
toupper_full.sort()

def dumpalpha():
	table = []
	prev = 0
	start = 0
	for code in isalpha:
		if code != prev+1:
			if start:
				table.append((start,prev))
			start = code
		prev = code
	table.append((start,prev))

	print("")
	print("static const Rune ucd_alpha2[] = {")
	for a, b in table:
		if b - a > 0:
			print(hex(a)+","+hex(b)+",")
	print("};");

	print("")
	print("static const Rune ucd_alpha1[] = {")
	for a, b in table:
		if b - a == 0:
			print(hex(a)+",")
	print("};");

def dumpmap(name, input):
	table = []
	prev_a = 0
	prev_b = 0
	start_a = 0
	start_b = 0
	for a, b in input:
		if a != prev_a+1 or b != prev_b+1:
			if start_a:
				table.append((start_a,prev_a,start_b))
			start_a = a
			start_b = b
		prev_a = a
		prev_b = b
	table.append((start_a,prev_a,start_b))

	print("")
	print("static const Rune " + name + "2[] = {")
	for a, b, n in table:
		if b - a > 0:
			print(hex(a)+","+hex(b)+","+str(n-a)+",")
	print("};");

	print("")
	print("static const Rune " + name + "1[] = {")
	for a, b, n in table:
		if b - a == 0:
			print(hex(a)+","+str(n-a)+",")
	print("};");

def dumpmultimap(name, table, w):
	print("")
	print("static const Rune " + name + "[] = {")
	for list in table:
		list += [0] * (w - len(list))
		print(",".join(map(hex, list)) + ",")
	print("};")

print("/* This file was automatically created from " + sys.argv[1] + " */")
dumpalpha()
dumpmap("ucd_tolower", tolower)
dumpmap("ucd_toupper", toupper)
dumpmultimap("ucd_tolower_full", tolower_full, 4)
dumpmultimap("ucd_toupper_full", toupper_full, 5)