File: test_derived_props_files.py

package info (click to toggle)
python-precis-i18n 1.1.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,836 kB
  • sloc: python: 1,825; sh: 28; makefile: 3
file content (151 lines) | stat: -rw-r--r-- 4,042 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import re
import unittest

VERSIONS = [
    "6.1",
    "6.2",
    "6.3",
    "8.0",
    "9.0",
    "10.0",
    "11.0",
    "12.0",
    "12.1",
    "13.0",
    "14.0",
]

UNASSIGNED = 1

PROPS = {
    "UNASSIGNED": UNASSIGNED,
    "PVALID": 2,
    "FREE_PVAL": 3,
    "ID_DIS or FREE_PVAL": 3,  # found in IANA format only
    "DISALLOWED": 4,
    "CONTEXTJ": 5,
    "CONTEXTO": 6,
}

LINE_REGEX = re.compile(r"^([0-9A-F]{4,6})-([0-9A-F]{4,6}) ([A-Z_]+)/[a-z0-9_]+$")

IANA_LINE_REGEX = re.compile(r"^([0-9A-F]{4,6})(-[0-9A-F]{4,6})?,([^,]+),.+$")

DIR_PATH = os.path.dirname(__file__)

# Allowed transitions between two versions V1 -> V2 for specific code points.
EXCEPTIONS = {0x111C9: (PROPS["FREE_PVAL"], PROPS["PVALID"])}  # SHARADA SANDHI MARK


def _allowed_change(cp, tbl1, tbl2):
    """Return true if the transition is allowed.

    We allow changing from UNASSIGNED to anything. Also, in 10.0 -> 11.0, the
    code point 70089 (0x111c9, SHARADA SANDHI MARK) changed from FREE_PVAL to
    PVALID.
    """
    if tbl1 == UNASSIGNED:
        return True
    rule = EXCEPTIONS.get(cp)
    if rule:
        return (tbl1, tbl2) == rule
    return False


def _load_table(filename):
    """Load data from `derived-props-<version>.txt` file."""

    table = bytearray(0x110000)

    with open(filename) as fp:
        for line in fp:
            m = LINE_REGEX.match(line)
            assert m, "Unexpected format: %s" % line

            lo, hi = int(m.group(1), 16), int(m.group(2), 16)
            prop = PROPS[m.group(3)]
            for cp in range(lo, hi + 1):
                table[cp] = prop

    # Check that all codepoints are assigned.
    for cp in range(0x110000):
        assert table[cp] != 0, "Codepoint missing: %d" % cp

    return table


def _load_table_iana(filename):
    """Load table from IANA csv file."""

    table = bytearray(0x110000)

    with open(filename) as fp:
        for line in fp:
            # Ignore csv header.
            if line == "Codepoint,Property,Description\n":
                continue

            m = IANA_LINE_REGEX.match(line)
            assert m, "Unexpected format: %s" % line

            lo = int(m.group(1), 16)
            if m.group(2):
                hi = int(m.group(2)[1:], 16)
            else:
                hi = lo

            prop = PROPS[m.group(3)]
            for cp in range(lo, hi + 1):
                table[cp] = prop

    # Check that all codepoints are assigned.
    for cp in range(0x110000):
        assert table[cp] != 0, "Codepoint missing: %d" % cp

    return table


def _load_tables():
    """Load data from all derived-props files."""

    tables = []
    file_template = os.path.join(DIR_PATH, "derived-props-%s.txt")

    for version in VERSIONS:
        table = _load_table(file_template % version)
        tables.append((version, table))

    return tables


class TestDerivedPropsFiles(unittest.TestCase):
    def test_derived_props(self):
        """Check derived property values do not change as UCD version increases.

        The only allowed change is from UNASSIGNED to anything.
        """
        tables = _load_tables()

        for i in range(len(tables) - 1):
            # Compare table i to table i+1.
            # If table[i] != table[i+1] then table[i] must equal UNASSIGNED(1).
            ver1, tbl1 = tables[i]
            ver2, tbl2 = tables[i + 1]
            for j in range(0x110000):
                if not _allowed_change(j, tbl1[j], tbl2[j]):
                    self.assertEqual(
                        tbl1[j], tbl2[j], "cp = %d (%s -> %s)" % (j, ver1, ver2)
                    )

    def test_iana_derived_props(self):
        """Compare IANA precis-tables to derived-props-6.3.txt"""

        iana_path = os.path.join(DIR_PATH, "iana-precis-tables-6.3.0.csv")
        test_path = os.path.join(DIR_PATH, "derived-props-6.3.txt")

        iana_table = _load_table_iana(iana_path)
        table = _load_table(test_path)

        # Tables should be identical.
        self.assertTrue(table == iana_table)