File: mkcaseconv

package info (click to toggle)
picolibc 1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 31,616 kB
  • sloc: ansic: 312,308; asm: 22,739; perl: 2,414; sh: 1,619; python: 1,019; pascal: 329; exp: 287; makefile: 164; xml: 40; cpp: 10
file content (130 lines) | stat: -rwxr-xr-x 3,588 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#! /bin/sh -f

# Copyright (c) 2018 Thomas Wolff <towo@towo.net>

# generate a table for Unicode case conversion; entries:
# struct caseconv_entry defined in towctrans_l.c

if [ -r UnicodeData.txt ]
then	UnicodeData=UnicodeData.txt
elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ]
then	UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt
else	echo UnicodeData.txt not found >&2
	exit 1
fi

LC_ALL=C
export LC_ALL

compact=true

#0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
#0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041
#0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I DOT;;;0069;
#01C4;LATIN CAPITAL LETTER DZ WITH CARON;Lu;0;L;<compat> 0044 017D;;;;N;LATIN CAPITAL LETTER D Z HACEK;;;01C6;01C5
#01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
#01C6;LATIN SMALL LETTER DZ WITH CARON;Ll;0;L;<compat> 0064 017E;;;;N;LATIN SMALL LETTER D Z HACEK;;01C4;;01C5

tr -d '\015' < $UnicodeData |
sed \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;]*\);\([^;][^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
-e t \
-e d |
(#src 01C5 upper "01C4" lower "01C6" title "01C5"
if $compact
then
  (
  cat <<\/EOS
  src () {
    if [ -n "$3" ]
    then	tohi=$(( 0x0$3 - 0x0$1 ))
    else	tohi=0
    fi
    if [ -n "$5" ]
    then	tolo=$(( 0x0$5 - 0x0$1 ))
    else	tolo=0
    fi
    case "$tolo.$tohi" in
    0.0)	true;;
    0.*)
	case "$1.$tohi" in
	*[02468ACE].1)	echo "'#error' U+$1 ODDSML";;
	*[02468ACE].-1)	echo "  0x$1 TO1 ODDCAP";;
	*[13579BDF].1)	echo "'#error' U+$1 EVENSML";;
	*[13579BDF].-1)	echo "  0x$1 TO1 EVENCAP";;
	*)		echo "  0x$1 TOUP $tohi";;
	esac;;
    *.0)
	case "$1.$tolo" in
	*[02468ACE].1)	echo "  0x$1 TO1 EVENCAP";;
	*[02468ACE].-1)	echo "'#error' U+$1 EVENSML";;
	*[13579BDF].1)	echo "  0x$1 TO1 ODDCAP";;
	*[13579BDF].-1)	echo "'#error' U+$1 ODDSML";;
	*)		echo "  0x$1 TOLO $tolo";;
	esac;;
    *)	case "$tolo.$tohi" in
	1.-1)		echo "  0x$1 TOBOTH 0";;
	*)		echo "'#error' U+$1";;
	esac;;
    esac
  }
/EOS
  cat
  ) | sh |
  uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," |
  (
  cat <<\/EOS
  first=
  diff=-1
  max=255
  range () {
	# $diff == $(($last - $first))
	if [ "$diff" -ge 0 ]
	then	# we have items at all
		echo "  {$first, $diff, $v2, $v3},"
	fi
	first=
	diff=-1
  }
  item () {
	if [ "$1" == "#error" ]
	then	echo "$*"
		return
	fi

	if [ $diff -eq $max ]
	then	range
	elif [ -n "$first" ]
	then	if [ $(( $1 )) -ne $(( ${last-0} + 1 )) ]
		then	range
		fi
	fi

	if [ -z "$first" ]
	then	first=$1
		v2=$2
		v3=$3
	fi

	last=$1
	diff=$(( $diff + 1 ))
  }
/EOS
  cat
  ) | sh
elif false
then
  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' \
      -e 's/\(0x[0-9A-F][0-9A-F]*\) - \(0x[0-9A-F][0-9A-F]*\)/$((`printf %d \1` - `printf %d \2`))/g' \
      -e 's/^/echo "/' -e 's/$/"/' |
  sh
else
  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/'
fi
) > caseconv.t