1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
#! /bin/sh -f
# Copyright (c) 2018 Thomas Wolff <towo@towo.net>
# generate a table for Unicode case conversion; entries:
# struct caseconv_entry defined in towctrans_l.c
if [ -r UnicodeData.txt ]
then UnicodeData=UnicodeData.txt
elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ]
then UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt
else echo UnicodeData.txt not found >&2
exit 1
fi
LC_ALL=C
export LC_ALL
compact=true
#0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
#0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041
#0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I DOT;;;0069;
#01C4;LATIN CAPITAL LETTER DZ WITH CARON;Lu;0;L;<compat> 0044 017D;;;;N;LATIN CAPITAL LETTER D Z HACEK;;;01C6;01C5
#01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
#01C6;LATIN SMALL LETTER DZ WITH CARON;Ll;0;L;<compat> 0064 017E;;;;N;LATIN SMALL LETTER D Z HACEK;;01C4;;01C5
tr -d '\015' < $UnicodeData |
sed \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;]*\);\([^;][^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e d |
(#src 01C5 upper "01C4" lower "01C6" title "01C5"
if $compact
then
(
cat <<\/EOS
src () {
if [ -n "$3" ]
then tohi=$(( 0x0$3 - 0x0$1 ))
else tohi=0
fi
if [ -n "$5" ]
then tolo=$(( 0x0$5 - 0x0$1 ))
else tolo=0
fi
case "$tolo.$tohi" in
0.0) true;;
0.*)
case "$1.$tohi" in
*[02468ACE].1) echo "'#error' U+$1 ODDSML";;
*[02468ACE].-1) echo " 0x$1 TO1 ODDCAP";;
*[13579BDF].1) echo "'#error' U+$1 EVENSML";;
*[13579BDF].-1) echo " 0x$1 TO1 EVENCAP";;
*) echo " 0x$1 TOUP $tohi";;
esac;;
*.0)
case "$1.$tolo" in
*[02468ACE].1) echo " 0x$1 TO1 EVENCAP";;
*[02468ACE].-1) echo "'#error' U+$1 EVENSML";;
*[13579BDF].1) echo " 0x$1 TO1 ODDCAP";;
*[13579BDF].-1) echo "'#error' U+$1 ODDSML";;
*) echo " 0x$1 TOLO $tolo";;
esac;;
*) case "$tolo.$tohi" in
1.-1) echo " 0x$1 TOBOTH 0";;
*) echo "'#error' U+$1";;
esac;;
esac
}
/EOS
cat
) | sh |
uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," |
(
cat <<\/EOS
first=
diff=-1
max=255
range () {
# $diff == $(($last - $first))
if [ "$diff" -ge 0 ]
then # we have items at all
echo " {$first, $diff, $v2, $v3},"
fi
first=
diff=-1
}
item () {
if [ "$1" == "#error" ]
then echo "$*"
return
fi
if [ $diff -eq $max ]
then range
elif [ -n "$first" ]
then if [ $(( $1 )) -ne $(( ${last-0} + 1 )) ]
then range
fi
fi
if [ -z "$first" ]
then first=$1
v2=$2
v3=$3
fi
last=$1
diff=$(( $diff + 1 ))
}
/EOS
cat
) | sh
elif false
then
sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/ {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
-e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' \
-e 's/\(0x[0-9A-F][0-9A-F]*\) - \(0x[0-9A-F][0-9A-F]*\)/$((`printf %d \1` - `printf %d \2`))/g' \
-e 's/^/echo "/' -e 's/$/"/' |
sh
else
sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/ {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
-e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/'
fi
) > caseconv.t
|