File: gen-casefold-txt.py

package info (click to toggle)
glib2.0 2.58.3-2
  • links: PTS, VCS
  • area: main
  • in suites: buster, buster-proposed-updates
  • size: 48,744 kB
  • sloc: ansic: 452,196; xml: 16,781; python: 6,149; makefile: 3,776; sh: 1,499; perl: 1,140; cpp: 9
file content (78 lines) | stat: -rwxr-xr-x 2,522 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# Copyright (C) 1998, 1999 Tom Tromey
# Copyright (C) 2001 Red Hat Software
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

"""
gen-casefold-txt.py - Generate test cases for casefolding from Unicode data.
See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
Usage:
    I consider the output of this program to be unrestricted.
    Use it as you will.
"""

import sys
import argparse


def main(argv):
    parser = argparse.ArgumentParser(
        description="Generate test cases for casefolding from Unicode data")
    parser.add_argument("UNICODE-VERSION")
    parser.add_argument("CaseFolding.txt")
    args = parser.parse_args(argv[1:])
    version = getattr(args, "UNICODE-VERSION")
    filename = getattr(args, "CaseFolding.txt")

    print("""\
# Test cases generated from Unicode {} data
# by gen-casefold-txt.py. Do not edit.
#
# Some special hand crafted tests
#
AaBbCc@@\taabbcc@@
#
# Now the automatic tests
#""".format(version))

    # Names of fields in the CaseFolding table
    CODE, STATUS, MAPPING = range(3)

    with open(filename, encoding="utf-8") as fileobj:
        for line in fileobj:
            # strip comments and skip empty lines
            line = line.split("#", 1)[0].strip()
            if not line:
                continue

            fields = [f.strip() for f in line.split(";", 3)[:3]]
            if len(fields) != 3:
                raise SystemExit(
                    "Entry for %s has wrong number of fields (%d)" % (
                        fields[CODE], len(fields)))

            status = fields[STATUS]
            # skip simple and Turkic mappings
            if status in "ST":
                continue

            code = chr(int(fields[CODE], 16))
            values = "".join(
                [chr(int(v, 16)) for v in fields[MAPPING].split()])
            print("{}\t{}".format(code, values))


if __name__ == "__main__":
    sys.exit(main(sys.argv))