1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
import strutils
import unicode_data
import derived_data
import min_perfect_hash
import utils
proc parseComps(
decompsRaw: seq[string],
exclude: seq[int] # todo: make it a set
): seq[Record[seq[int]]] =
var maxCompSize = 0
for dcp in decompsRaw:
if dcp.len > 0:
inc maxCompSize
result = newSeqOfCap[Record[seq[int]]](maxCompSize)
for cp, dcp in pairs(decompsRaw):
if dcp.len == 0:
continue
if cp in exclude:
continue
if "<" in dcp: # Compatibility decomp
continue
let dcpParts = dcp.split(" ")
assert len(dcpParts) == 2
let
cp_a = parseHexInt("0x$#" % dcpParts[0])
cp_b = parseHexInt("0x$#" % dcpParts[1])
result.add((
key: @[cp_a, cp_b],
value: @[cp_a, cp_b, cp]))
const compsTemplate = """## This is auto-generated. Do not modify it
const
compsHashes* = [
$#
]
compsValues* = [
$#
]
"""
when isMainModule:
var decomps = parseComps(
parseUDDecomps("./gen/UCD/UnicodeData.txt"),
parseDNPExclusion("./gen/UCD/DerivedNormalizationProps.txt"))
var mphTables = mph(decomps)
echo mphLookup(mphTables.h, mphTables.v, [65, 768])
var compValues = newSeq[string](len(mphTables.v))
for i, v in mphTables.v:
assert len(v) == 3
compValues[i] = "[$#]" % join(v, "'i32, ")
var f = open("./src/unicodedb/compositions_data.nim", fmWrite)
try:
f.write(compsTemplate % [
prettyTable(mphTables.h, 15, "'i16"),
join(compValues, ",\L ")])
finally:
close(f)
|