1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
from .IndicShaperData import IndicPosition, make_syllable_machine
from .SyllabicShaper import SyllabicShaper
from collections import OrderedDict
myanmar_category_reassignments = {
0x002D: "GB",
0x00A0: "GB",
0x00D7: "GB",
0x1004: "Ra",
0x101B: "Ra",
0x1032: "A",
0x1036: "A",
0x1038: "SM",
0x1039: "H",
0x103A: "As",
0x103B: "MY",
0x103C: "MR",
0x103D: "MW",
0x103E: "MH",
0x1040: "D", # XXX The spec says D0, but Uniscribe doesn't seem to do.
0x104A: "P",
0x104B: "P",
0x104E: "C",
0x105A: "Ra",
0x105E: "MY",
0x105F: "MY",
0x1060: "MH",
0x1082: "MW",
0x109A: "SM",
0x109B: "SM",
0x109C: "SM",
0x2012: "GB",
0x2013: "GB",
0x2014: "GB",
0x2015: "GB",
0x2022: "GB",
0x25CC: "GB",
0x25FB: "GB",
0x25FC: "GB",
0x25FD: "GB",
0x25FE: "GB",
0xAA74: "C",
0xAA75: "C",
0xAA76: "C", # https://github.com/harfbuzz/harfbuzz/issues/218
}
for cp in range(0x1041, 0x1049 + 1):
myanmar_category_reassignments[cp] = "D"
for cp in range(0x1090, 0x1099 + 1):
myanmar_category_reassignments[cp] = "D"
for cp in range(0xF300, 0xFE0F + 1):
myanmar_category_reassignments[cp] = "VS"
for cp in range(0x1087, 0x108F + 1):
myanmar_category_reassignments[cp] = "SM"
for cp in [0x1063, 0x1064, 0x1069, 0x106A, 0x106B, 0x106C, 0x106D, 0xAA7B]:
myanmar_category_reassignments[cp] = "PT"
states = OrderedDict(
j="ZWJ|ZWNJ", # Joiners
k="(Ra As H)", # Kinzi
c="C|Ra", # is_consonant
medial_group="MY? As? MR? ((MW MH? | MH) As?)?",
main_vowel_group="(VPre VS?)* VAbv* VBlw* A* (DB As?)?",
post_vowel_group="VPst MH? As* VAbv* A* (DB As?)?",
pwo_tone_group="PT A* DB? As?",
complex_syllable_tail="As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* V* j?",
syllable_tail="(H (c|IV) VS?)* (H | complex_syllable_tail)",
consonant_syllable="(k|CS)? (c|IV|D|GB) VS? syllable_tail",
punctuation_cluster="P V",
broken_cluster="k? VS? syllable_tail",
)
class MyanmarShaper(SyllabicShaper):
basic_features = ["rphf", "pref", "blwf", "pstf"]
other_features = ["pres", "abvs", "blws", "psts"]
repha = None
syllable_machine = make_syllable_machine(
states,
additional_categories=[
"VS",
"MW",
"P",
"As",
"PT",
"MY",
"MH",
"D",
"GB",
"MR",
"VPre",
"VAbv",
"VBlw",
"VPst",
],
)
syllable_types = [
"consonant_syllable",
"punctuation_cluster",
"broken_cluster",
"other",
]
def reassign_category(self, item):
cp = item.codepoint
if cp in myanmar_category_reassignments:
item.syllabic_category = myanmar_category_reassignments[cp]
if item.syllabic_category == "M":
if item.syllabic_position == IndicPosition.PRE_C:
item.syllabic_category = "VPre"
item.syllabic_position = IndicPosition.PRE_M
elif item.syllabic_position == IndicPosition.ABOVE_C:
item.syllabic_category = "VAbv"
elif item.syllabic_position == IndicPosition.BELOW_C:
item.syllabic_category = "VBlw"
elif item.syllabic_position == IndicPosition.POST_C:
item.syllabic_category = "VPst"
def initial_reordering_consonant_syllable(self, start, end):
def cat(i):
return self.buffer.items[i].syllabic_category
def get_pos(i):
return self.buffer.items[i].syllabic_position
def set_pos(i, pos):
self.buffer.items[i].syllabic_position = pos
def is_consonant(n): # XXX Something else is Placeholder too
isc = cat(n)
is_medial = isc == "CM"
return (
isc in ["C", "CS", "Ra", "V", "PLACEHOLDER", "DOTTEDCIRCLE"]
or is_medial
)
base = end
has_reph = False
limit = start
if (
start + 3 <= end
and cat(start) == "Ra"
and cat(start + 1) == "As"
and cat(start + 2) == "H"
):
limit += 3
base = start
has_reph = True
if not has_reph:
base = limit
for i in range(limit, end):
if is_consonant(i):
base = i
break
i = start
while i < start + (3 if has_reph else 0):
set_pos(i, IndicPosition.AFTER_MAIN)
i += 1
while i < base:
set_pos(i, IndicPosition.PRE_C)
i += 1
if i < end:
set_pos(i, IndicPosition.BASE_C)
i += 1
pos = IndicPosition.AFTER_MAIN
while i < end:
if cat(i) == "MR":
set_pos(i, IndicPosition.PRE_C)
elif get_pos(i) < IndicPosition.BASE_C: # Left Matra
pass
elif cat(i) == "VS":
set_pos(i, get_pos(i - 1))
elif pos == IndicPosition.AFTER_MAIN and cat(i) == "VBlw":
pos = IndicPosition.BELOW_C
set_pos(i, pos)
elif pos == IndicPosition.BELOW_C and cat(i) == "A":
set_pos(i, IndicPosition.BEFORE_SUB)
elif pos == IndicPosition.BELOW_C and cat(i) == "VBlw":
set_pos(i, pos)
elif pos == IndicPosition.BELOW_C and cat(i) != "A":
pos = IndicPosition.AFTER_SUB
set_pos(i, pos)
else:
set_pos(i, pos)
i = i + 1
continue
self.buffer.items[start:end] = sorted(
self.buffer.items[start:end], key=lambda x: x.syllabic_position
)
initial_reordering_syllable = {
"broken_cluster": initial_reordering_consonant_syllable,
"consonant_syllable": initial_reordering_consonant_syllable,
}
|