File: create_encoding.py

package info (click to toggle)
scummvm 2.9.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 450,580 kB
  • sloc: cpp: 4,299,825; asm: 28,322; python: 12,901; sh: 11,302; java: 9,289; xml: 7,895; perl: 2,639; ansic: 2,465; yacc: 1,670; javascript: 1,020; makefile: 933; lex: 578; awk: 275; objc: 82; sed: 11; php: 1
file content (98 lines) | stat: -rwxr-xr-x 2,672 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
# encoding: utf-8

import struct
from hanzi_charmap import simplified_charmap, traditional_charmap

def merge_ranges(*ranges):
    # Use a set for efficient lookup
    result = set()
    for rng in ranges:
        result.update(rng)
    return result

def processtable(inputfilename, outfile, highrange, lowrange):
    with open(inputfilename) as f:
        res = dict(((x << 8) | y, 0) for x in highrange for y in lowrange)

        for line in f:
            if line.startswith('#'):
                continue
            splits = line.split()
            if len(splits[0]) <= 5:
                continue
            key = int(splits[0], 16)
            val = int(splits[1], 16)
            high = (key >> 8) & 0xff
            low = key & 0xff
            if high not in highrange or low not in lowrange:
                continue
            res[key] = val

    for (k, v) in sorted(res.items()):
        outfile.write(struct.pack("<H", v))

def process_hanzi_t2s(outfile):
    l = min(len(traditional_charmap), len(simplified_charmap))
    outfile.write(struct.pack("<i", l))
    for i in range(l):
        outfile.write(struct.pack("<HH", ord(traditional_charmap[i]), ord(simplified_charmap[i])))
    
encdat = open("encoding.dat", "wb")
encdat.write(b'SCVMENCD')
# version
encdat.write(struct.pack("<i", 0))

HIGH = 'high'
LOW = 'low'
FILE = 'file'

tables = [
    {
        FILE: "CP932.TXT",
        HIGH: merge_ranges(range(0x81, 0x85), range(0x87, 0xa0), range(0xe0, 0xef), range(0xfa, 0xfd)),
        LOW: range(0x40, 0x100)
    },
    {
        FILE: "CP949.TXT",
        HIGH: range(0x81, 0xFF),
        LOW: merge_ranges(range(0x41, 0x5b), range(0x61, 0x7b), range(0x81, 0xFF))
    },
    {
        FILE: "CP950.TXT",
        HIGH: range(0xA1, 0xFA),
        LOW: merge_ranges(range(0x40, 0x7f), range(0xa1, 0xff))
    },
    {
        FILE: "JOHAB.TXT",
        HIGH: range(0x84, 0xD4),
        LOW: merge_ranges(range(0x41, 0x7f), range(0x81, 0xff))
    },
    {
        FILE: "CP936.TXT",
        HIGH: range(0x81, 0xFF),
        LOW: merge_ranges(range(0x40, 0x7f), range(0x80, 0xff))
    },
    {
        FILE: "hanzi_charmap.py"
    },
]

# number of tables
encdat.write(struct.pack("<i", len(tables)))

curofs = 16 + 4 * len(tables)

for v in tables:
    encdat.write(struct.pack("<i", curofs))
    if v[FILE] == "hanzi_charmap.py":
        curofs += min(len(traditional_charmap), len(simplified_charmap)) * 4 + 4
    else:
        curofs += len(v[HIGH]) * len(v[LOW]) * 2

for v in tables:
    if v[FILE] == "hanzi_charmap.py":
        process_hanzi_t2s(encdat)
    else:
        processtable(v[FILE], encdat, v[HIGH], v[LOW])