File: unicodesymbols.py

package info (click to toggle)
lyx 2.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 138,444 kB
  • sloc: cpp: 244,268; ansic: 106,398; xml: 72,791; python: 39,384; sh: 7,666; makefile: 6,584; pascal: 2,143; perl: 2,101; objc: 1,084; tcl: 163; sed: 16
file content (123 lines) | stat: -rwxr-xr-x 3,079 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#! /usr/bin/python3
# -*- coding: utf-8 -*-

# file unicodesymbols.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.

# author Georg Baum

# Full author contact details are available in file CREDITS

# This script reads a unicode symbol file and completes it in the given range


from __future__ import print_function
import os, re, string, sys, unicodedata
import io


def usage(prog_name):
    return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
            "or     %s start stop <inputfile >outputfile" % prog_name)


def error(message):
    sys.stderr.write(message + '\n')
    sys.exit(1)


def trim_eol(line):
    " Remove end of line char(s)."
    if line[-1:] == '\n':
        return line[:-1]
    else:
        # file with no EOL in last line
        return line


def read(input):
    " Read input file and strip lineendings."
    lines = list()
    while 1:
        line = input.readline()
        if not line:
            break
        line = trim_eol(line)
        tokens = line.split()
        char = -1
        if len(tokens) > 0:
            if tokens[0][0:2] == "0x":
                char = int(tokens[0][2:], 16)
            elif tokens[0][0:3] == "#0x":
                char = int(tokens[0][3:], 16)
        lines.append([char, line])
    return lines


def write(output, lines):
    " Write output file."
    for line in lines:
        output.write(line[1] + '\n')


def complete(lines, start, stop):
    l = 0
    for i in range(start, stop):
        # This catches both comments (lines[l][0] == -1) and code points less than i
        while l < len(lines) and lines[l][0] < i:
#            print(lines[l])
            l = l + 1
            continue
        if l >= len(lines) or lines[l][0] != i:
            if sys.version_info[0] < 3:
                c = unichr(i)
            else:
                c = chr(i)
            name = unicodedata.name(c, "")
            if name != "":
                if unicodedata.combining(c):
                    combining = "combining"
                else:
                    combining = ""
                line = [i, '#0x%04x ""                         "" "%s" "" "" # %s' % (i, combining, name)]
                lines.insert(l, line)
#                print(lines[l])
                l = l + 1


def main(argv):

    # Open files
    if len(argv) == 3:
        input = sys.stdin
        output = sys.stdout
    elif len(argv) == 5:
        input = io.open(argv[3], 'r', encoding='utf_8')
        output = io.open(argv[4], 'w', encoding='utf_8')
    else:
        error(usage(argv[0]))
    if argv[1][:2] == "0x":
        start = int(argv[1][2:], 16)
    else:
        start = int(argv[1])
    if argv[2][:2] == "0x":
        stop = int(argv[2][2:], 16)
    else:
        stop = int(argv[2])

    # Do the real work
    lines = read(input)
    complete(lines, start, stop)
    write(output, lines)

    # Close files
    if len(argv) == 3:
        input.close()
        output.close()

    return 0


if __name__ == "__main__":
    main(sys.argv)