File: gen-unicode-vo.py

package info (click to toggle)
rust-unicode-vo 0.1.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 120 kB
  • sloc: python: 52; makefile: 4
file content (70 lines) | stat: -rwxr-xr-x 1,687 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3

import urllib.request
import os

URL = 'https://www.unicode.org/Public/vertical/revision-17/VerticalOrientation-17.txt'
FILE_NAME = 'VerticalOrientation-17.txt'

if not os.path.exists(FILE_NAME):
    urllib.request.urlretrieve(URL, FILE_NAME)

ranges = []
with open(FILE_NAME) as f:
    for line in f:
        line = line.strip()

        if not line:
            continue

        if line.startswith('#'):
            continue

        range, type = line.split(' ; ')

        if type == 'R':
            continue
        elif type == 'U':
            rust_type = 'Orientation::Upright'
        elif type == 'Tu':
            rust_type = 'Orientation::TransformedOrUpright'
        elif type == 'Tr':
            rust_type = 'Orientation::TransformedOrRotated'

        if '..' in range:
            start, end = range.split('..')
            ranges.append([start, end, rust_type])
        else:
            ranges.append([range, None, rust_type])

merged_ranges = []
idx = 0
while idx < len(ranges):
    range = ranges[idx]

    merged_ranges.append(range)
    idx += 1

    if range[1] != None:
        continue

    last_char = int(range[0], 16) + 1

    while idx < len(ranges):
        next_range = ranges[idx]

        if next_range[1] != None:
            break

        if last_char == int(next_range[0], 16) and range[2] == next_range[2]:
            merged_ranges[-1][1] = next_range[0]
            last_char += 1
            idx += 1
        else:
            break

for range in merged_ranges:
    if range[1]:
        print('0x{}...0x{} => {},'.format(range[0], range[1], range[2]))
    else:
        print('0x{} => {},'.format(range[0], range[2]))