1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
#!/usr/bin/env python3
import sys
codes = []
for line in open('data/unicode.txt').readlines(): # Original source: https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt
try:
code = int(line.split(';')[0], 16)
# Ensure the character is encodable (surrogates are not)
chr(code).encode('utf8')
if (code >= 128):
codes.append(code)
except:
pass
# We use the base36 encoded index as the compose sequence to minimize
# the total number of keysyms required.
def base36(n):
chars = '0123456789abcdefghijklmnopqrstuvwxyz'
s = ''
s += chars[n // (len(chars)*len(chars)) % len(chars)]
s += chars[n // len(chars) % len(chars)]
s += chars[n % len(chars)]
return s
# Generate the compose file
data = ''
for n, code in enumerate(codes):
data += '<Cancel> '
data += ' '.join(f'<{c}>' for c in base36(n))
data += f' : "{chr(code)}"\n'
open('data/keyd.compose', 'w').write(data)
# Generate the corresponding src/unicode.c
# OPT: We could condense this and shave off lookup time by using an offset
# table to capitalize on codepoint contiguity, but 35k is small enough to
# warrant keeping the entire thing in memory.
open('src/unicode.c', 'w').write(f'''
/* GENERATED BY {sys.argv[0]}, DO NOT MODIFY BY HAND. */
#include <stdint.h>
#include <stdlib.h>
#include "keys.h"
uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }};
int unicode_lookup_index(uint32_t codepoint)
{{
size_t i = 0;
for(i = 0; i < sizeof(unicode_table)/sizeof(unicode_table[0]); i++) {{
if (unicode_table[i] == codepoint)
return i;
}}
return -1;
}}
void unicode_get_sequence(int idx, uint8_t codes[4])
{{
uint8_t chars[] = {{
KEYD_0, KEYD_1, KEYD_2, KEYD_3, KEYD_4, KEYD_5, KEYD_6, KEYD_7,
KEYD_8, KEYD_9, KEYD_A, KEYD_B, KEYD_C, KEYD_D, KEYD_E, KEYD_F,
KEYD_G, KEYD_H, KEYD_I, KEYD_J, KEYD_K, KEYD_L, KEYD_M, KEYD_N,
KEYD_O, KEYD_P, KEYD_Q, KEYD_R, KEYD_S, KEYD_T, KEYD_U, KEYD_V,
KEYD_W, KEYD_X, KEYD_Y, KEYD_Z
}};
codes[0] = KEYD_CANCEL;
codes[1] = chars[idx / (36 * 36) % 36];
codes[2] = chars[idx / 36 % 36];
codes[3] = chars[idx % 36];
}}
'''
.replace('\n\t', '\n')
.lstrip()
)
|