File: generate_unicode_tolower.py

package info (click to toggle)
ddnet 19.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 68,960 kB
  • sloc: cpp: 195,050; ansic: 58,572; python: 5,568; asm: 946; sh: 941; java: 366; xml: 206; makefile: 31
file content (57 lines) | stat: -rw-r--r-- 1,271 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Needs UnicodeData.txt in the current directory.
#
# It can be obtained from unicode.org:
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
#
# If executed as a script, it will generate the contents of the file
# python3 scripts/generate_unicode_tolower.py header > `src/base/unicode/tolower.h`,
# python3 scripts/generate_unicode_tolower.py data > `src/base/unicode/tolower_data.h`.

import sys
import unicode

def generate_cases():
	ud = unicode.data()
	return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]]

def gen_header(cases):
	print(f"""\
#include <cstdint>

struct UPPER_LOWER
{{
\tint32_t upper;
\tint32_t lower;
}};

enum
{{
\tNUM_TOLOWER = {len(cases)},
}};

extern const struct UPPER_LOWER tolowermap[];""")

def gen_data(cases):
	print("""\
#ifndef TOLOWER_DATA
#error "This file must only be included in `tolower.cpp`"
#endif

const struct UPPER_LOWER tolowermap[] = {""")
	for upper_code, lower_code in cases:
		print(f"\t{{{upper_code}, {lower_code}}},")
	print("};")

def main():
	cases = generate_cases()

	header = "header" in sys.argv
	data = "data" in sys.argv

	if header:
		gen_header(cases)
	elif data:
		gen_data(cases)

if __name__ == '__main__':
	main()