1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
#!/bin/bash
# -*- sh -*-
# Takes on stdin a file formated like the official UnicodeData list;
# Writes on stdout a fallback table suitable for `consolechars -k'
# Note: we assume all diacrs match a non-diacr !
if [ $# != 3 ]
then
echo >&2 "Usage: $(basename $0) FULL_PATTERN REDUCTED_PATTERN TRANSLATION"
echo >&2 " < /usr/share/unicode/UnicodeData.txt >out.fallback"
exit 1
fi
FULL_GREP_PATTERN=$1
REDUCTED_PATTERN=$2
TRANSLATION=$3
FULL_SED_PATTERN=$(echo ${FULL_GREP_PATTERN} | sed -e 's/?/\\{0,1\\}/g' -e 's/\([()|]\)/\\\1/g')
FILE=/tmp/file.$$
cut -d\; -f1,2 >$FILE
cat << EOF
#
# Fallback file from chars matching:
# '${FULL_GREP_PATTERN}'
# to chars matching:
# '${REDUCTED_PATTERN}'
# using as a translation rule:
# '${TRANSLATION}'
#
# Auto-generated by $(basename $0) from UnicodeData list
#
# ** DO NOT EDIT MANUALLY **
#
EOF
cat < $FILE |
# select only targeted lines
egrep -f <(echo ";${FULL_GREP_PATTERN}$") |
# filter out those we want to reduct to
egrep -v -f <(echo ";${REDUCTED_PATTERN}$") |
# translate into wished reduction, and keep orig name as 3rd field
sed -f <(echo "s/;${FULL_SED_PATTERN}$/;${TRANSLATION}\0/") |
# translate into wished unicode
sed -f <(
# construct a sed filter that adds their unicodes to reduction names
egrep < $FILE ";${REDUCTED_PATTERN}$" |
awk -F\; '{print "s/;" $2 ";/;U+" $1 "\\0/"}'
) |
# filter out lines that did not give a reducted pattern
grep ';U+' |
# format lines in fallback-file syntax
awk -F\; '{print "# " $4 " : " $3 "\nU+" $1 " " $2}'
rm $FILE
|