File: update_table.py

package info (click to toggle)
translitcodec 0.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 336 kB
  • sloc: python: 2,600; perl: 182; makefile: 33; sh: 5
file content (80 lines) | stat: -rw-r--r-- 2,383 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Updates translitcodec/__init__.py with translation table information
built from the 'transtab' database.

:copyright: the translitcodec authors and developers, see AUTHORS.
:license: MIT, see LICENSE for more details.
"""
import csv
import os
import sys


csv.register_dialect('transtab', delimiter=';')


def read_table(path='transtab/transtab'):
    long, short, single = {}, {}, {}

    with open(path) as fh:
        for line in fh.readlines():
            if not line.startswith('<'):
                continue
            from_spec, raw_to = line.strip().split(' ', 1)
            from_ord = int(from_spec[2:-1], 16)
            if from_ord <= 128:
                continue

            raw = next(csv.reader([raw_to], 'transtab'))
            long_char = _unpack_uchrs(raw[0])
            if len(raw) < 2:
                short_char = long_char
            else:
                short_char = _unpack_uchrs(raw[1])

            long[from_ord] = long_char
            short[from_ord] = short_char
            if len(short_char) == 1:
                single[from_ord] = short_char
    return long, short, single


def _unpack_uchrs(packed):
    chunks = packed.replace('<U', ' ').strip().split()
    return ''.join(chr(int(spec[:-1], 16)) for spec in chunks)


def update_inclusion(long, short, single, path="translitcodec/__init__.py"):
    with open(path, 'r') as fh:
        preamble, old, postamble = [], [], []
        bucket = preamble
        for line in fh.readlines():
            if line.startswith('### <'):
                bucket = postamble
            bucket.append(line)
            if line.startswith('### >'):
                bucket = old

    with open(path, 'w') as fh:
        fh.writelines(preamble)
        fh.write("\n")
        _dump_dict(fh, 'long_table', long)
        _dump_dict(fh, 'short_table', short)
        _dump_dict(fh, 'single_table', single)
        fh.write("\n")
        fh.writelines(postamble)


def _dump_dict(fh, name, data):
    fh.write("%s = {\n" % name)
    for pair in sorted(data.items()):
        fh.write("  %r: %r,\n" % pair)
    fh.write("}\n\n")

if __name__ == '__main__':
    if not (os.path.exists('translitcodec') and os.path.exists('transtab')):
        print("Can not find translitcodec/ and transtab/ directories.")
        sys.exit(-1)
    tables = read_table()
    update_inclusion(*tables)
    print("Updated.")