File: gen-vowel-constraints.py

package info (click to toggle)
python-fontfeatures 1.9.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,096 kB
  • sloc: python: 9,112; makefile: 22
file content (156 lines) | stat: -rwxr-xr-x 5,930 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3

"""Generator of the function to prohibit certain vowel sequences.

It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
circles into sequences prohibited by the USE script development spec.
This function should be used as the ``preprocess_text`` of an
``hb_ot_complex_shaper_t``.

usage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt

"""

import collections
import youseedee

def write (s):
    sys.stdout.flush ()
    sys.stdout.buffer.write (s.encode ('utf-8'))
import sys

if len (sys.argv) != 2:
    sys.exit (__doc__)

script_order = {}
scripts = {}

for start, end,script in youseedee.parse_file_ranges("Scripts.txt"):
    for u in range (start, end + 1):
        scripts[u] = script
    if script not in script_order:
        script_order[script] = start

class ConstraintSet (object):
    """A set of prohibited code point sequences.

    Args:
        constraint (List[int]): A prohibited code point sequence.

    """
    def __init__ (self, constraint):
        # Either a list or a dictionary. As a list of code points, it
        # represents a prohibited code point sequence. As a dictionary,
        # it represents a set of prohibited sequences, where each item
        # represents the set of prohibited sequences starting with the
        # key (a code point) concatenated with any of the values
        # (ConstraintSets).
        self._c = constraint

    def add (self, constraint):
        """Add a constraint to this set."""
        if not constraint:
            return
        first = constraint[0]
        rest = constraint[1:]
        if isinstance (self._c, list):
            if constraint == self._c[:len (constraint)]:
                self._c = constraint
            elif self._c != constraint[:len (self._c)]:
                self._c = {self._c[0]: ConstraintSet (self._c[1:])}
        if isinstance (self._c, dict):
            if first in self._c:
                self._c[first].add (rest)
            else:
                self._c[first] = ConstraintSet (rest)

    @staticmethod
    def _indent (depth):
        return ('    ' * depth)

    @staticmethod
    def _cp_accessor(index):
        if index:
            return "buffer.items[i+{}].codepoint".format(index)
        return "buffer.items[i].codepoint"

    def __str__ (self, index=0, depth=2):
        s = []
        indent = self._indent (depth)

        if isinstance (self._c, list):
            if len (self._c) == 0:
                assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented'
                s.append ('{}matched = True\n'.format (indent))
            elif len (self._c) == 1:
                assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented'
                s.append ('{}matched = 0x{:04X} == {}\n'.format (indent, next (iter (self._c)), self._cp_accessor(index)))
            else:
                s.append ('{}if (0x{:04X} == {} and\n'.format (indent, self._c[0], self._cp_accessor(index)))
                if index:
                    s.append ('{}i + {} < len(buffer.items)-1 and\n'.format (self._indent (depth + 2), index + 1))
                for i, cp in enumerate (self._c[1:], start=1):
                    s.append ('{}0x{:04X} == {}{}\n'.format (
                        self._indent (depth + 2), cp, self._cp_accessor(index + i), '):' if i == len (self._c) - 1 else 'and')
                    )
                s.append ('{}matched = True\n'.format (self._indent (depth + 1)))
        else:
            cases = collections.defaultdict (set)
            for first, rest in sorted (self._c.items ()):
                cases[rest.__str__ (index + 1, depth + 2)].add (first)
            for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]):
                if len(labels) == 1:
                    s.append (self._indent (depth + 1) + "if {} == 0x{:04X}:\n".format(self._cp_accessor(index), list(labels)[0]))
                else:
                    points = ", ".join(['0x{:04X}'.format(cp) for cp in sorted(labels)])
                    s.append (self._indent (depth + 1) + "if {} in [{}]:\n".format(self._cp_accessor(index), points))
                s.append (body)
        return ''.join (s)

constraints = {}
with open (sys.argv[1], encoding='utf-8') as f:
    constraints_header = []
    while True:
        line = f.readline ().strip ()
        if line == '#':
            break
        constraints_header.append(line)
    for line in f:
        j = line.find ('#')
        if j >= 0:
            line = line[:j]
        constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
        if not constraint: continue
        assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
        script = scripts[constraint[0]]
        if script in constraints:
            constraints[script].add (constraint)
        else:
            constraints[script] = ConstraintSet (constraint)
        assert constraints, 'No constraints found'

print ('# The following functions are generated by running:')
print ('# %s ms-use/IndicShapingInvalidCluster.txt' % sys.argv[0])

print("""
from fontFeatures.shaperLib.Buffer import BufferItem

DOTTED_CIRCLE = 0x25CC

def _insert_dotted_circle(buf, index):
    dotted_circle = BufferItem.new_unicode(DOTTED_CIRCLE)
    buf.items.insert(index, dotted_circle)

""")

print ('def preprocess_text_vowel_constraints(buffer):')

for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
    print(f'    if buffer.script == "{script}":')
    print ('        i = 0')
    print ('        while i < len(buffer.items)-1:')
    print ('            matched = False')
    write (str (constraints))
    print ('            i = i + 1')
    print ('            if matched: _insert_dotted_circle(buffer, i)')