File: MyanmarShaper.py

package info (click to toggle)
python-fontfeatures 1.9.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,096 kB
  • sloc: python: 9,112; makefile: 22
file content (199 lines) | stat: -rw-r--r-- 6,058 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from .IndicShaperData import IndicPosition, make_syllable_machine
from .SyllabicShaper import SyllabicShaper
from collections import OrderedDict


myanmar_category_reassignments = {
    0x002D: "GB",
    0x00A0: "GB",
    0x00D7: "GB",
    0x1004: "Ra",
    0x101B: "Ra",
    0x1032: "A",
    0x1036: "A",
    0x1038: "SM",
    0x1039: "H",
    0x103A: "As",
    0x103B: "MY",
    0x103C: "MR",
    0x103D: "MW",
    0x103E: "MH",
    0x1040: "D",  # XXX The spec says D0, but Uniscribe doesn't seem to do.
    0x104A: "P",
    0x104B: "P",
    0x104E: "C",
    0x105A: "Ra",
    0x105E: "MY",
    0x105F: "MY",
    0x1060: "MH",
    0x1082: "MW",
    0x109A: "SM",
    0x109B: "SM",
    0x109C: "SM",
    0x2012: "GB",
    0x2013: "GB",
    0x2014: "GB",
    0x2015: "GB",
    0x2022: "GB",
    0x25CC: "GB",
    0x25FB: "GB",
    0x25FC: "GB",
    0x25FD: "GB",
    0x25FE: "GB",
    0xAA74: "C",
    0xAA75: "C",
    0xAA76: "C",  # https://github.com/harfbuzz/harfbuzz/issues/218
}

for cp in range(0x1041, 0x1049 + 1):
    myanmar_category_reassignments[cp] = "D"
for cp in range(0x1090, 0x1099 + 1):
    myanmar_category_reassignments[cp] = "D"
for cp in range(0xF300, 0xFE0F + 1):
    myanmar_category_reassignments[cp] = "VS"
for cp in range(0x1087, 0x108F + 1):
    myanmar_category_reassignments[cp] = "SM"
for cp in [0x1063, 0x1064, 0x1069, 0x106A, 0x106B, 0x106C, 0x106D, 0xAA7B]:
    myanmar_category_reassignments[cp] = "PT"


states = OrderedDict(
    j="ZWJ|ZWNJ",  # Joiners
    k="(Ra As H)",  # Kinzi
    c="C|Ra",  # is_consonant
    medial_group="MY? As? MR? ((MW MH? | MH) As?)?",
    main_vowel_group="(VPre VS?)* VAbv* VBlw* A* (DB As?)?",
    post_vowel_group="VPst MH? As* VAbv* A* (DB As?)?",
    pwo_tone_group="PT A* DB? As?",
    complex_syllable_tail="As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* V* j?",
    syllable_tail="(H (c|IV) VS?)* (H | complex_syllable_tail)",
    consonant_syllable="(k|CS)? (c|IV|D|GB) VS? syllable_tail",
    punctuation_cluster="P V",
    broken_cluster="k? VS? syllable_tail",
)


class MyanmarShaper(SyllabicShaper):
    basic_features = ["rphf", "pref", "blwf", "pstf"]
    other_features = ["pres", "abvs", "blws", "psts"]
    repha = None
    syllable_machine = make_syllable_machine(
        states,
        additional_categories=[
            "VS",
            "MW",
            "P",
            "As",
            "PT",
            "MY",
            "MH",
            "D",
            "GB",
            "MR",
            "VPre",
            "VAbv",
            "VBlw",
            "VPst",
        ],
    )
    syllable_types = [
        "consonant_syllable",
        "punctuation_cluster",
        "broken_cluster",
        "other",
    ]

    def reassign_category(self, item):
        cp = item.codepoint
        if cp in myanmar_category_reassignments:
            item.syllabic_category = myanmar_category_reassignments[cp]

        if item.syllabic_category == "M":
            if item.syllabic_position == IndicPosition.PRE_C:
                item.syllabic_category = "VPre"
                item.syllabic_position = IndicPosition.PRE_M
            elif item.syllabic_position == IndicPosition.ABOVE_C:
                item.syllabic_category = "VAbv"
            elif item.syllabic_position == IndicPosition.BELOW_C:
                item.syllabic_category = "VBlw"
            elif item.syllabic_position == IndicPosition.POST_C:
                item.syllabic_category = "VPst"

    def initial_reordering_consonant_syllable(self, start, end):
        def cat(i):
            return self.buffer.items[i].syllabic_category

        def get_pos(i):
            return self.buffer.items[i].syllabic_position

        def set_pos(i, pos):
            self.buffer.items[i].syllabic_position = pos

        def is_consonant(n):  # XXX Something else is Placeholder too
            isc = cat(n)
            is_medial = isc == "CM"
            return (
                isc in ["C", "CS", "Ra", "V", "PLACEHOLDER", "DOTTEDCIRCLE"]
                or is_medial
            )

        base = end
        has_reph = False

        limit = start
        if (
            start + 3 <= end
            and cat(start) == "Ra"
            and cat(start + 1) == "As"
            and cat(start + 2) == "H"
        ):
            limit += 3
            base = start
            has_reph = True
        if not has_reph:
            base = limit
        for i in range(limit, end):
            if is_consonant(i):
                base = i
                break
        i = start
        while i < start + (3 if has_reph else 0):
            set_pos(i, IndicPosition.AFTER_MAIN)
            i += 1
        while i < base:
            set_pos(i, IndicPosition.PRE_C)
            i += 1
        if i < end:
            set_pos(i, IndicPosition.BASE_C)
            i += 1
        pos = IndicPosition.AFTER_MAIN
        while i < end:
            if cat(i) == "MR":
                set_pos(i, IndicPosition.PRE_C)
            elif get_pos(i) < IndicPosition.BASE_C:  # Left Matra
                pass
            elif cat(i) == "VS":
                set_pos(i, get_pos(i - 1))
            elif pos == IndicPosition.AFTER_MAIN and cat(i) == "VBlw":
                pos = IndicPosition.BELOW_C
                set_pos(i, pos)
            elif pos == IndicPosition.BELOW_C and cat(i) == "A":
                set_pos(i, IndicPosition.BEFORE_SUB)
            elif pos == IndicPosition.BELOW_C and cat(i) == "VBlw":
                set_pos(i, pos)
            elif pos == IndicPosition.BELOW_C and cat(i) != "A":
                pos = IndicPosition.AFTER_SUB
                set_pos(i, pos)
            else:
                set_pos(i, pos)
            i = i + 1
            continue

        self.buffer.items[start:end] = sorted(
            self.buffer.items[start:end], key=lambda x: x.syllabic_position
        )

    initial_reordering_syllable = {
        "broken_cluster": initial_reordering_consonant_syllable,
        "consonant_syllable": initial_reordering_consonant_syllable,
    }