File: HangulShaper.py

package info (click to toggle)
python-fontfeatures 1.9.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,096 kB
  • sloc: python: 9,112; makefile: 22
file content (174 lines) | stat: -rw-r--r-- 6,169 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from .BaseShaper import BaseShaper
from fontFeatures.shaperLib.Buffer import BufferItem

LBase = 0x1100
VBase = 0x1161
TBase = 0x11A7
LCount = 19
VCount = 21
TCount = 28
SBase = 0xAC00
NCount = VCount * TCount
SCount = LCount * NCount


def isCombiningL(u):
    return LBase <= u <= LBase + LCount - 1


def isCombiningV(u):
    return VBase <= u <= VBase + VCount - 1


def isCombiningT(u):
    return TBase + 1 <= u <= TBase + TCount - 1


def isCombinedS(u):
    return SBase <= u <= SBase + SCount - 1


def isL(u):
    return 0x1100 <= u <= 0x115F or 0xA960 <= u <= 0xA97C


def isV(u):
    return 0x1160 <= u <= 0x11A7 or 0xD7B0 <= u <= 0xD7C6


def isT(u):
    return 0x11A8 <= u <= 0x11FF or 0xD7CB <= u <= 0xD7FB


def isHangulTone(u):
    return 0x302E <= u <= 0x302F


class HangulShaper(BaseShaper):
    def collect_features(self, shaper):
        shaper.add_features("ljmo", "vjmo", "tjmo")

    def override_features(self, shaper):
        shaper.disable_feature("calt")

    def preprocess_text(self):
        def cp(i):
            return self.buffer.items[i].codepoint

        def has_glyph(u):
            return self.font.unicode_map.get(u, None)

        # Setup masks here too
        for i in self.buffer.items:
            i.feature_masks["ljmo"] = False
            i.feature_masks["vjmo"] = False
            i.feature_masks["tjmo"] = False
        start, end, processed = 0, 0, 0
        count = len(self.buffer.items)
        for i in range(count):
            u = cp(i)
            if isHangulTone(u):
                if start < end and end == processed:
                    if not self.is_zero_width_char(u):
                        # merge out clusters?

                        # Move end to start
                        l = self.buffer.items
                        l.insert(start, l.pop(end))
                else:
                    if has_glyph(0x25CC):
                        if self.is_zero_width_char(u):
                            l.insert(i + 1, BufferItem.new_unicode(0x25CC))
                        else:
                            l.insert(i, BufferItem.new_unicode(0x25CC))
                start = end = i
                continue
            start = i
            if isL(u) and i + 1 < count:
                l = u
                v = cp(i + 1)
                if isV(v):
                    # <L,V> or <L,V,T>
                    t, tindex = 0, 0
                    if i + 2 < count:
                        t = cp(i + 2)
                        if isT(t):
                            tindex = t - TBase
                        else:
                            t = 0
                    # Unsafe to break
                    if (
                        isCombiningL(l)
                        and isCombiningV(v)
                        and (not t or isCombiningT(t))
                    ):
                        s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex
                        if has_glyph(s):
                            # Combine
                            if t:
                                self.buffer.items[i : i + 3] = [
                                    BufferItem.new_unicode(s)
                                ]
                            else:
                                self.buffer.items[i : i + 2] = [
                                    BufferItem.new_unicode(s)
                                ]
                            end = start + 1
                            continue
                    # Didn't compose
                    self.buffer.items[i].feature_masks["ljmo"] = True
                    self.buffer.items[i + 1].feature_masks["vjmo"] = True
                    if t:
                        self.buffer.items[i + 2].feature_masks["tjmo"] = True
                        end = start + 3
                    else:
                        end = start + 2
                    continue
            elif isCombinedS(u):
                s = u
                has_glyph_s = has_glyph(s)
                lindex = (s - SBase) // NCount
                nindex = (s - SBase) % NCount
                vindex = nindex // TCount
                tindex = nindex % TCount
                if (
                    not tindex
                    and i + 1 < count
                    and isCombiningT(self.buffer.items[i + 1].codepoint)
                ):
                    # <LV,T>
                    new_tindex = cp(i + 1) - TBase
                    new_s = s + new_tindex
                    if has_glyph(new_s):
                        self.buffer.items[i : i + 2] = [BufferItem.new_unicode(new_s)]
                        end = start + 1
                        continue
                    else:
                        # unsafe to break
                        pass
                if not has_glyph or (not tindex and i + 1 < count and isT(cp(i + 1))):
                    decomposed = [LBase + lindex, VBase + vindex, TBase + tindex]
                    if (
                        has_glyph(decomposed[0])
                        and has_glyph(decomposed[1])
                        and (not tindex or has_glyph(decomposed[2]))
                    ):
                        if not tindex:
                            decomposed.pop()
                        s_len = len(decomposed)
                        self.buffer.items[i : i + s_len] = [
                            BufferItem.new_unicode(u) for u in decomposed
                        ]
                        if has_glyph_s and not tindex:
                            i = i + 1
                            s_len = s_len + 1
                        end = start + s_len
                        self.buffer.items[start + 1].feature_masks["ljmo"] = True
                        self.buffer.items[start + 2].feature_masks["vjmo"] = True
                        if start + 3 < end:
                            self.buffer.items[start + 3].feature_masks["tjmo"] = True
                        # merge out clusters
                        continue
                    elif not tindex and i + 1 < count and isT(cp(i + 1)):
                        # Mark unsafe to break
                        pass