1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
# Create utfdata.h from UnicodeData.txt
tolower = []
toupper = []
isalpha = []
for line in open("UnicodeData.txt").readlines():
line = line.split(";")
code = int(line[0],16)
# if code > 65535: continue # skip non-BMP codepoints
if line[2][0] == 'L':
isalpha.append(code)
if line[12]:
toupper.append((code,int(line[12],16)))
if line[13]:
tolower.append((code,int(line[13],16)))
def dumpalpha():
table = []
prev = 0
start = 0
for code in isalpha:
if code != prev+1:
if start:
table.append((start,prev))
start = code
prev = code
table.append((start,prev))
print("")
print("static const int ucd_alpha2[] = {")
for a, b in table:
if b - a > 0:
print(hex(a)+","+hex(b)+",")
print("};");
print("")
print("static const int ucd_alpha1[] = {")
for a, b in table:
if b - a == 0:
print(hex(a)+",")
print("};");
def dumpmap(name, input):
table = []
prev_a = 0
prev_b = 0
start_a = 0
start_b = 0
for a, b in input:
if a != prev_a+1 or b != prev_b+1:
if start_a:
table.append((start_a,prev_a,start_b))
start_a = a
start_b = b
prev_a = a
prev_b = b
table.append((start_a,prev_a,start_b))
print("")
print("static const int " + name + "2[] = {")
for a, b, n in table:
if b - a > 0:
print(hex(a)+","+hex(b)+","+str(n-a)+",")
print("};");
print("")
print("static const int " + name + "1[] = {")
for a, b, n in table:
if b - a == 0:
print(hex(a)+","+str(n-a)+",")
print("};");
print("/* This file was automatically created from UnicodeData.txt */")
dumpalpha()
dumpmap("ucd_tolower", tolower)
dumpmap("ucd_toupper", toupper)
|