File: variation_selector.py

package info (click to toggle)
mautrix-python 0.20.7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,812 kB
  • sloc: python: 19,103; makefile: 16
file content (115 lines) | stat: -rw-r--r-- 3,708 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Copyright (c) 2022 Tulir Asokan
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import annotations

import json
import pkgutil

import aiohttp

EMOJI_VAR_URL = "https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-variation-sequences.txt"


def read_data() -> dict[str, str]:
    """
    Get the list of emoji that need a variation selector. This loads the local data file that was
    previously generated from the Unicode spec data files.

    Returns:
        A dict from hex to the emoji string (you have to bring the variation selectors yourself).
    """
    return json.loads(pkgutil.get_data("mautrix.util", "variation_selector.json"))


async def fetch_data() -> dict[str, str]:
    """
    Generate the list of emoji that need a variation selector from the Unicode spec data files.

    Returns:
        A dict from hex to the emoji string (you have to bring the variation selectors yourself).
    """
    async with aiohttp.ClientSession() as sess, sess.get(EMOJI_VAR_URL) as resp:
        data = await resp.text()
    emojis = {}
    for line in data.split("\n"):
        if "emoji style" in line:
            emoji_hex = line.split(" ", 1)[0]
            emojis[emoji_hex] = rf"\U{emoji_hex:>08}".encode("ascii").decode("unicode-escape")
    return emojis


if __name__ == "__main__":
    import asyncio
    import sys

    import pkg_resources

    path = pkg_resources.resource_filename("mautrix.util", "variation_selector.json")
    emojis = asyncio.run(fetch_data())
    with open(path, "w") as file:
        json.dump(emojis, file, indent="    ", ensure_ascii=False)
        file.write("\n")
    print(f"Wrote {len(emojis)} emojis to {path}")
    sys.exit(0)

VARIATION_SELECTOR_16 = "\ufe0f"
ADD_VARIATION_TRANSLATION = str.maketrans(
    {ord(emoji): f"{emoji}{VARIATION_SELECTOR_16}" for emoji in read_data().values()}
)
SKIN_TONE_MODIFIERS = ("\U0001F3FB", "\U0001F3FC", "\U0001F3FD", "\U0001F3FE", "\U0001F3FF")
SKIN_TONE_REPLACEMENTS = {f"{VARIATION_SELECTOR_16}{mod}": mod for mod in SKIN_TONE_MODIFIERS}
VARIATION_SELECTOR_REPLACEMENTS = {
    **SKIN_TONE_REPLACEMENTS,
    "\U0001F408\ufe0f\u200d\u2b1b\ufe0f": "\U0001F408\u200d\u2b1b",
}


def add(val: str) -> str:
    r"""
    Add emoji variation selectors (16) to all emojis that have multiple forms in the given string.
    This will remove all variation selectors first to make sure it doesn't add duplicates.

    .. versionadded:: 0.12.5

    Examples:
        >>> from mautrix.util import variation_selector
        >>> variation_selector.add("\U0001f44d")
        "\U0001f44d\ufe0f"
        >>> variation_selector.add("\U0001f44d\ufe0f")
        "\U0001f44d\ufe0f"
        >>> variation_selector.add("4\u20e3")
        "4\ufe0f\u20e3"
        >>> variation_selector.add("\U0001f9d0")
        "\U0001f9d0"

    Args:
        val: The string to add variation selectors to.

    Returns:
        The string with variation selectors added.
    """
    added = remove(val).translate(ADD_VARIATION_TRANSLATION)
    for invalid_selector, replacement in VARIATION_SELECTOR_REPLACEMENTS.items():
        added = added.replace(invalid_selector, replacement)
    return added


def remove(val: str) -> str:
    """
    Remove all emoji variation selectors in the given string.

    .. versionadded:: 0.12.5

    Args:
        val: The string to remove variation selectors from.

    Returns:
        The string with variation selectors removed.
    """
    return val.replace(VARIATION_SELECTOR_16, "")


__all__ = ["add", "remove", "read_data", "fetch_data"]