File: assemble_charactersets.py

package info (click to toggle)
python-glyphsets 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,396 kB
  • sloc: python: 973; xml: 432; sh: 11; makefile: 3
file content (283 lines) | stat: -rw-r--r-- 9,642 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
"""
Assemble .nam files from .nam stub files and language definitions.
"""

import sys
import os
import shutil
import gflanguages
import unicodedata
import glyphsLib
import functools
import plistlib
from glyphsLib.glyphdata import get_glyph, _lookup_attributes_by_unicode
from fontTools.unicodedata.Scripts import NAMES as SCRIPT_NAMES

# Insert local module path at beginning of sys.path
# so that up-to-date version of glyphsets package is used
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "Lib"))
from glyphsets import (
    get_script,
    defined_glyphsets,
    get_glyphset_definition,
    unicodes_per_glyphset,
    languages_per_glyphset,
    read_nam_file,
)  # noqa: E402


def sort_unicodes(a, b):
    if a.unicode and b.unicode:
        return int(a.unicode, 16) - int(b.unicode, 16)
    elif a.unicode:
        return -1
    elif b.unicode:
        return 1
    else:
        return 0


def sort_by_category(a, b):
    info_a = get_glyph(a.name)
    info_b = get_glyph(b.name)

    if info_a.category is None:
        return -1
    elif info_b.category is None:
        return 1

    value = sorted([info_a.category, info_b.category]).index(info_a.category)
    if value == 0:
        value = -1
    value *= -1

    return value


def assemble_characterset(root_folder, glyphset_name):

    script = get_script(glyphset_name)
    glyphset_definition = get_glyphset_definition(glyphset_name)
    language_codes = languages_per_glyphset(glyphset_name)
    use_aux = glyphset_definition.get("use_auxiliary", False)

    nam_stub_path = os.path.join(
        root_folder, "definitions", "per_glyphset", f"{glyphset_name}.stub.nam"
    )
    nam_path = os.path.join(root_folder, "results", "nam", f"{glyphset_name}.nam")
    nam_in_package_path = os.path.abspath(
        os.path.join(
            root_folder,
            "..",
            "Lib",
            "glyphsets",
            "results",
            "nam",
            f"{glyphset_name}.nam",
        )
    )
    txt_in_package_path = os.path.abspath(
        os.path.join(
            root_folder,
            "..",
            "Lib",
            "glyphsets",
            "results",
            "txt",
            "nice-names",
            f"{glyphset_name}.txt",
        )
    )
    glyphs_stub_path = os.path.join(
        root_folder, "definitions", "per_glyphset", f"{glyphset_name}.stub.glyphs"
    )
    glyphs_path = os.path.join(
        root_folder, "results", "glyphs", f"{glyphset_name}.glyphs"
    )
    glyphs_empty_path = os.path.join(root_folder, f"empty_font.glyphs")
    txt_nicenames_path = os.path.join(
        root_folder, "results", "txt", "nice-names", f"{glyphset_name}.txt"
    )
    txt_prodnames_path = os.path.join(
        root_folder, "results", "txt", "prod-names", f"{glyphset_name}.txt"
    )
    plist_path = os.path.join(
        root_folder, "results", "plist", f"CustomFilter_GF_{script}.plist"
    )

    character_set = set()

    # Assemble character sets from gflanguages
    languages = gflanguages.LoadLanguages()
    for language_code in language_codes:
        chars = languages[language_code].exemplar_chars
        # chars.base.upper() is important because many Latin languages don't
        # contain a complete set of uppercase letters in "index"
        character_set.update(
            {
                ord(c)
                for c in list(
                    set(chars.base)
                    | set(chars.base.upper())
                    | set(chars.index)
                    | set(chars.marks)
                    | set(chars.numerals)
                    | set(chars.punctuation)
                    | (set(chars.auxiliary) if use_aux else set())
                )
                if c not in (" ", "{", "}", "◌")
            }
        )

    # Call get_glyph once so that GLYPHDATA gets filled in glyphsLib
    get_glyph("A")
    # If I import GLYPHDATA at the top of the file, it doesn't get filled
    from glyphsLib.glyphdata import GLYPHDATA

    assert type(GLYPHDATA) is glyphsLib.glyphdata.GlyphData

    def _font_has_unicode(font, unicode):
        for glyph in font.glyphs:
            if glyph.unicode:
                if int(glyph.unicode, 16) == unicode:
                    return True

    # Create or open glyphs file and add characters
    if os.path.exists(glyphs_stub_path):
        font = glyphsLib.load(glyphs_stub_path)
        for glyph in font.glyphs:
            if glyph.unicodes:
                for unicode in glyph.unicodes:
                    character_set.update({int(unicode, 16)})
    else:
        font = glyphsLib.load(glyphs_empty_path)

    font.familyName = glyphset_name

    # Add language-specific glyphs
    for language_code in language_codes:
        per_language_glyphs_stub_path = os.path.join(
            root_folder, "definitions", "per_language", f"{language_code}.stub.glyphs"
        )
        if os.path.exists(per_language_glyphs_stub_path):
            per_language_font = glyphsLib.load(per_language_glyphs_stub_path)

            for glyph in per_language_font.glyphs:

                # Add encoded characters to character_set
                if glyph.unicodes:
                    for unicode in glyph.unicodes:
                        character_set.update({int(unicode, 16)})

                # Add unencoded glyphs to .glyphs file
                else:
                    new_glyph = glyphsLib.GSGlyph(glyph.name)
                    font.glyphs.append(new_glyph)

    # Add encoded characters to .glyphs file
    for _i, unicode in enumerate(sorted(list(character_set))):
        if not _font_has_unicode(font, unicode):
            unicode = f"{unicode:#0{6}X}".replace("0X", "")
            glyph_info = _lookup_attributes_by_unicode(unicode, GLYPHDATA)
            if "name" in glyph_info:
                new_glyph = glyphsLib.GSGlyph(glyph_info["name"])
            else:
                new_glyph = glyphsLib.GSGlyph(f"uni{unicode}")
            new_glyph.unicode = unicode
            font.glyphs.append(new_glyph)

    # Sort
    font.glyphs = sorted(font.glyphs, key=functools.cmp_to_key(sort_by_category))
    unicode_sorted_glyphs = sorted(font.glyphs, key=functools.cmp_to_key(sort_unicodes))
    glyph_names = [glyph.name for glyph in unicode_sorted_glyphs]
    production_glyph_names = [
        get_glyph(glyph.name).production_name for glyph in unicode_sorted_glyphs
    ]

    # Save glyphs file
    os.makedirs(os.path.dirname(glyphs_path), exist_ok=True)
    font.save(glyphs_path)

    # Output sorted character set to .nam file
    os.makedirs(os.path.dirname(nam_path), exist_ok=True)
    os.makedirs(os.path.dirname(nam_in_package_path), exist_ok=True)
    with open(nam_path, "w") as f:
        f.write(
            "# This file is auto-generated; do not edit. See /README.md for instructions.\n"
        )
        for i, unicode in enumerate(sorted(list(character_set))):
            unicode_string = f"{unicode:#0{6}X}".replace("0X", "0x")
            try:
                unicode_name = unicodedata.name(chr(unicode))
            except ValueError:
                unicode_name = ""
            f.write(f"{unicode_string} {unicode_name}")
            if i < len(character_set) - 1:
                f.write("\n")
    shutil.copyfile(nam_path, nam_in_package_path)

    # Output txt files
    os.makedirs(os.path.dirname(txt_nicenames_path), exist_ok=True)
    with open(txt_nicenames_path, "w") as f:
        f.write(
            "# This file is auto-generated; do not edit. See /README.md for instructions.\n"
        )
        f.write("\n".join(glyph_names))
    os.makedirs(os.path.dirname(txt_prodnames_path), exist_ok=True)
    with open(txt_prodnames_path, "w") as f:
        f.write(
            "# This file is auto-generated; do not edit. See /README.md for instructions.\n"
        )
        f.write("\n".join(production_glyph_names))
    os.makedirs(os.path.dirname(txt_in_package_path), exist_ok=True)
    shutil.copyfile(txt_nicenames_path, txt_in_package_path)

    # Adjust .plist
    os.makedirs(os.path.dirname(plist_path), exist_ok=True)
    if os.path.exists(plist_path):
        with open(plist_path, "rb") as f:
            plist = plistlib.load(f)
    else:
        plist = []
    found_list = False
    for plist_glyphset in plist:
        if "name" in plist_glyphset and plist_glyphset["name"] == glyphset_name:
            plist_glyphset["list"] = glyph_names
            found_list = True
    if not found_list:
        plist.append({"name": glyphset_name, "list": glyph_names})
    with open(plist_path, "wb") as f:
        plistlib.dump(plist, f)


if __name__ == "__main__":
    # Check for gflanguages version
    installed = None
    latest = None
    for line in os.popen("pip index versions gflanguages").read().split("\n"):
        if "INSTALLED" in line:
            installed = line.split(" ")[-1].strip()
        if "LATEST" in line:
            latest = line.split(" ")[-1].strip()
    print(
        f"""
*************************************************************
*
*   WARNING:
*   Make sure you're using the correct version of gflanguages,
*   otherwise the glyphsets will be incorrect.
*
*   You have: {installed}
*   Location: {gflanguages.__file__}
*
*************************************************************
"""
    )

    root_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))

    for glyphset_name in defined_glyphsets():
        print(f"Assembling '{glyphset_name}'...")
        assemble_characterset(root_folder, glyphset_name)
        # Proof of work:
        # assert unicodes_per_glyphset(glyphset_name) != []