1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import argparse
import json
import xml.etree.ElementTree
import enum
_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
_CHROME_SOURCE = os.path.realpath(
os.path.join(_SCRIPT_DIR, *[os.path.pardir] * 5))
sys.path.append(os.path.join(_CHROME_SOURCE, 'build'))
import action_helpers
# LINT.IfChange
class Tone(enum.IntEnum):
LIGHT = 1
MEDIUM_LIGHT = 2
MEDIUM = 3
MEDIUM_DARK = 4
DARK = 5
class Gender(enum.IntEnum):
WOMAN = 1
MAN = 2
# LINT.ThenChange(//chrome/browser/resources/chromeos/emoji_picker/types.ts)
# Codepoints for the range of skin tone modifiers from lightest to darkest.
TONE_MODIFIERS = {
ord('🏻'): Tone.LIGHT,
ord('🏼'): Tone.MEDIUM_LIGHT,
ord('🏽'): Tone.MEDIUM,
ord('🏾'): Tone.MEDIUM_DARK,
ord('🏿'): Tone.DARK,
}
# Codepoints for the woman and man gender modifiers.
GENDER_MODIFIERS = {
ord('♀'): Gender.WOMAN,
ord('♂'): Gender.MAN,
ord('👩'): Gender.WOMAN,
ord('👨'): Gender.MAN,
ord('👧'): Gender.WOMAN,
ord('👦'): Gender.MAN,
ord('🤶'): Gender.WOMAN,
ord('🎅'): Gender.MAN,
ord('👸'): Gender.WOMAN,
ord('🤴'): Gender.MAN,
ord('👵'): Gender.WOMAN,
ord('👴'): Gender.MAN,
ord('🤰'): Gender.WOMAN,
ord('🫃'): Gender.MAN,
}
def parse_emoji_annotations(keyword_file):
names = {}
keywords = {}
tree = xml.etree.ElementTree.parse(keyword_file)
root = tree.getroot()
for tag in root.iterfind('./annotations/annotation'):
cp = tag.attrib['cp']
if tag.attrib.get('type') == 'tts':
if tag.text.startswith('flag'):
names[cp] = tag.text.replace('flag:', 'flag of')
else:
names[cp] = tag.text
else:
keywords[cp] = tag.text.split(' | ')
return names, keywords
def parse_emoji_metadata(metadata_file):
with open(metadata_file, 'r') as file:
return json.load(file)
def get_codepoint_matches(codepoints, modifiers):
return [codepoint for codepoint in codepoints if codepoint in modifiers]
def get_attribute(codepoints, modifiers):
matches = get_codepoint_matches(codepoints, modifiers)
if len(matches) == 1:
return modifiers[matches[0]]
return None
# Returns True if the emoji is multi-tone or multi-gender.
def is_multi(codepoints, modifiers):
return len(get_codepoint_matches(codepoints, modifiers)) > 1
def transform_emoji_data(metadata, names, keywords, first_only):
def transform(codepoints, is_variant, emoticons=None, shortcodes=None):
if emoticons is None:
emoticons = []
if shortcodes is None:
shortcodes = []
# transform array of codepoint values into unicode string.
string = u''.join(chr(x) for x in codepoints)
# keyword data has U+FE0F emoji presentation characters removed.
if string not in names:
string = string.replace(u'\ufe0f', u'')
# TODO(b/183440310): Better handle search for non-standard emoji.
if string in names:
name = names[string]
keyword_list = keywords[string] + emoticons + shortcodes
else:
name = ''
keyword_list = emoticons
if is_variant:
return {'string': string, 'name': name}
else:
return {'string': string, 'name': name, 'keywords': keyword_list}
if first_only:
metadata = [metadata[0]]
else:
metadata = metadata[1:]
# Create a new object for output since they keep adding extra properties to
# the JSON (rather than just editing the input object).
out = []
for group in metadata:
newGroup = []
for emoji in group['emoji']:
newobj = {
'base':
transform(
emoji['base'],
False,
emoji['emoticons'],
emoji.get('shortcodes', []),
),
}
if emoji['alternates']:
newobj['alternates'] = []
has_multi_tone = False
has_multi_gender = False
for codepoints in emoji['alternates']:
if is_multi(codepoints, TONE_MODIFIERS):
has_multi_tone = True
if is_multi(codepoints, GENDER_MODIFIERS):
has_multi_gender = True
if has_multi_tone and has_multi_gender:
break
for codepoints in emoji['alternates']:
variant = transform(codepoints, True)
tone = None
gender = None
# Multi-tone preferences are individual, so all tones are
# ignored if any variant is multi-tone. Same for gender.
if not has_multi_tone:
tone = get_attribute(codepoints, TONE_MODIFIERS)
if not has_multi_gender:
gender = get_attribute(codepoints, GENDER_MODIFIERS)
if tone:
variant['tone'] = tone
newobj['groupedTone'] = True
if gender:
variant['gender'] = gender
newobj['groupedGender'] = True
newobj['alternates'].append(variant)
newGroup.append(newobj)
out.append({'emoji': newGroup})
return out
def main(args):
parser = argparse.ArgumentParser()
parser.add_argument('--metadata',
required=True,
help='emoji metadata ordering file as JSON')
parser.add_argument('--output',
required=True,
help='output JSON file path')
parser.add_argument('--keywords',
required=True,
nargs='+',
help='emoji keyword files as list of XML files')
parser.add_argument(
'--firstgroup',
required=True,
help='Only output the first group, otherwise only output other groups')
options = parser.parse_args(args)
metadata_file = options.metadata
keyword_files = options.keywords
output_file = options.output
first_group = options.firstgroup == 'True'
# iterate through keyword files and combine them
names = {}
keywords = {}
for file in keyword_files:
_names, _keywords = parse_emoji_annotations(file)
names.update(_names)
keywords.update(_keywords)
# parse emoji ordering data
metadata = parse_emoji_metadata(metadata_file)
metadata = transform_emoji_data(metadata, names, keywords, first_group)
# write output file atomically in utf-8 format.
with action_helpers.atomic_output(output_file) as tmp_file:
tmp_file.write(
json.dumps(metadata, separators=(',', ':'),
ensure_ascii=False).encode('utf-8'))
if __name__ == '__main__':
main(sys.argv[1:])
|