File: gen_gemoji.py

package info (click to toggle)
pymdown-extensions 10.13-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,104 kB
  • sloc: python: 60,117; javascript: 846; sh: 8; makefile: 5
file content (134 lines) | stat: -rw-r--r-- 3,874 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""Generate gemoji data."""
import sys
import os
import json
import codecs
current_dir = os.path.dirname(os.path.abspath(__file__))

U_JOIN = 0x200d
U_VARIATION_SELECTOR_16 = 0xfe0f
U_EXTRA = (U_JOIN, U_VARIATION_SELECTOR_16)

if sys.maxunicode == 0xFFFF:

    def get_code_points(s):
        """Get the Unicode code points."""

        pt = []

        def is_full_point(p, point):
            """
            Check if we have a full code point.

            Surrogates are stored in point.
            """
            v = ord(p)
            if 0xD800 <= v <= 0xDBFF:
                del point[:]
                point.append(p)
                return False
            if point and 0xDC00 <= v <= 0xDFFF:
                point.append(p)
                return True
            del point[:]
            return True

        return [(''.join(pt) if pt else c) for c in s if is_full_point(c, pt)]

    def get_ord(c):
        """Get Unicode ordinal number."""

        if len(c) == 2:
            high, low = (ord(p) for p in c)
            ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
        else:
            ordinal = ord(c)

        return ordinal

else:
    def get_code_points(s):
        """Get the Unicode code points."""

        return list(s)

    def get_ord(c):
        """Get Unicode ordinal number."""

        return ord(c)


def get_unicode(value):
    """Get Unicode."""

    uc = '-'.join(
        ['%04x' % get_ord(point) for point in get_code_points(value['emoji']) if get_ord(point) not in U_EXTRA]
    )

    uc_alt = '-'.join(
        ['%04x' % get_ord(point) for point in get_code_points(value['emoji'])]
    )

    if uc == uc_alt:
        uc_alt = None

    return uc, uc_alt


def get_gemoji_specific(value):
    """Get alternate Unicode form or return the original."""

    return value['aliases'][0]


def parse(repo, tag):
    """Save test files."""
    # Load emoji database
    with codecs.open(os.path.join(current_dir, 'tags', repo, repo, 'db', 'emoji.json'), 'r', encoding='utf-8') as f:
        emojis = json.loads(f.read())

    emoji_db = {}
    shortnames = set()
    aliases = {}
    for v in emojis:
        short = v['aliases'][0]
        shortnames.add(':%s:' % short)
        if 'emoji' in v:
            uc, uc_alt = get_unicode(v)
            emoji_db[':%s:' % short] = {
                'name': v.get('description', short),
                'unicode': uc,
                'category': v['category']
            }
            if uc_alt:
                emoji_db[':%s:' % short]['unicode_alt'] = uc_alt
        else:
            emoji_db[':%s:' % short] = {
                'name': v.get('description', short)
            }

        for alias in v['aliases'][1:]:
            aliases[':%s:' % alias] = ':%s:' % short

    # Save test files
    for test in ('png', 'entities'):
        with open('../tests/extensions/emoji/gemoji (%s).txt' % test, 'w') as f:
            f.write('# Emojis\n')
            count = 0
            for emoji in sorted(shortnames):
                f.write(''.join('{} {}<br>\n'.format(emoji[1:-1], emoji)))
                count += 1
                if test != 'png' and count == 10:
                    break

    with open(os.path.join(current_dir, 'tags', repo, repo, 'LICENSE')) as f:
        license_content = f.read()

    # Write out essential info
    with open('../pymdownx/gemoji_db.py', 'w') as f:
        # Dump emoji db to file and strip out PY2 unicode specifiers
        f.write('"""Gemoji autogen.\n\nGenerated from gemoji source. Do not edit by hand.\n\n%s"""\n' % license_content)
        f.write('version = "%s"\n' % tag)
        f.write('name = "gemoji"\n')
        f.write('emoji = %s\n' % json.dumps(emoji_db, sort_keys=True, indent=4, separators=(',', ': ')))
        f.write('aliases = %s\n' % json.dumps(aliases, sort_keys=True, indent=4, separators=(',', ': ')))