File: unicodetab2h.sh

package info (click to toggle)
virtuoso-opensource 6.1.4%2Bdfsg1-7
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 245,116 kB
  • sloc: ansic: 639,631; sql: 439,225; xml: 287,085; java: 61,048; sh: 38,723; cpp: 36,889; cs: 25,240; php: 12,562; yacc: 9,036; lex: 7,149; makefile: 6,093; jsp: 4,447; awk: 1,643; perl: 1,017; ruby: 1,003; python: 329
file content (120 lines) | stat: -rwxr-xr-x 5,802 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/bin/sh
#  
#  $Id: unicodetab2h.sh,v 1.3.2.1 2011/01/18 14:36:37 source Exp $
#
#  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
#  project.
#  
#  Copyright (C) 1998-2009 OpenLink Software
#  
#  This project is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by the
#  Free Software Foundation; only version 2 of the License, dated June 1991.
#  
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#  General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#  

# ----------------------------------------------------------------------
#  Fix issues with LOCALE
# ----------------------------------------------------------------------
LANG=C
LC_ALL=POSIX
export LANG LC_ALL


tabtoheader()
{
echo -n "Translation from Unicode3 table to $2 ...	"
sed 's/^\(.*\)$/UNICODE3_REC(\1),/g' < $1 > tmp/4.c
sed 's/^UNICODE3_REC(\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^)]*\)),$/UNICODE3_REC( 0x\1	, "\3"	, \4	, "\5"	,e \7,f \8, UNICODE3_F6(\6) , "\2")/g' < tmp/4.c > tmp/5.c
sed 's/,f \([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^,]*\), UNICODE3_F6(\([^)]*\)) ,/,n \1,o \2,h \8,i \7,j \6,m \3,UNICODE3_F6(\9),k \5,l \4,/g' < tmp/5.c > tmp/6.c
echo -n " 20% done...	"
sed 's/,e ,/,e -1,/g' < tmp/6.c > tmp/7.c
sed 's/,e \([^,]*\),/, \1	,/g' < tmp/7.c > tmp/8.c
sed 's/,i ,/,i 0000,/g' < tmp/8.c > tmp/9.c
sed 's/,i \([^,]*\),/, 0x\1	,/g' < tmp/9.c > tmp/A.c
sed 's/,j ,/,j 0000,/g' < tmp/A.c > tmp/B.c
sed 's/,j \([^,]*\),/, 0x\1	,/g' < tmp/B.c > tmp/C.c
sed 's/,h ,/,h 0000,/g' < tmp/C.c > tmp/D.c
sed 's/,h \([^,]*\),/, 0x\1	,/g' < tmp/D.c > tmp/E.c
sed 's/,m Y,/, 1	,/g' < tmp/E.c > tmp/F.c
sed 's/,m N,/, 0	,/g' < tmp/F.c > tmp/G.c
sed 's/,n ,/,n -1,/g' < tmp/G.c > tmp/H.c
sed 's/,n \([^,]*\),/, \1	,/g' < tmp/H.c > tmp/I.c
sed 's/,o ,/,o -1,/g' < tmp/I.c > tmp/J.c
sed 's/,o \([^,]*\),/, \1.0   	,/g' < tmp/J.c > tmp/K.c
sed 's/,k \([^,]*\),/, "\1"	,/g' < tmp/K.c > tmp/L.c
sed 's/,l \([^,]*\),/, "\1"	,/g' < tmp/L.c > tmp/M.c
echo -n " 70% done...	"
sed 's/,UNICODE3_F6(),/, NULL,/g' < tmp/M.c > tmp/N.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\)),/, UNICODE3_S1(UNICODE3_\2, 0x\4),/g' < tmp/N.c > tmp/O.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S2(UNICODE3_\2, 0x\4, 0x\5),/g' < tmp/O.c > tmp/P.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S3(UNICODE3_\2, 0x\4, 0x\5, 0x\6),/g' < tmp/P.c > tmp/Q.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S4(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7),/g' < tmp/Q.c > tmp/R.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S5(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8),/g' < tmp/R.c > tmp/S.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S6(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8, 0x\9),/g' < tmp/S.c > tmp/T.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F ]*\)),/, UNICODE3_SX(UNICODE3_\2, UNICODE3_long_ligature("\4")),/g' < tmp/T.c > tmp/U.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S1(UNICODE3_exact, 0x\2),/g' < tmp/U.c > tmp/V.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S2(UNICODE3_exact, 0x\2, 0x\4),/g' < tmp/V.c > tmp/W.c

cat > $2 <<-EOF
/*
 *  This file is generated by unicodetab2h.sh
 *  
 *  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
 *  project.
 *  
 *  Copyright (C) 1998-2006 OpenLink Software
 *  
 *  This project is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; only version 2 of the License, dated June 1991.
 *  
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *  
 *  
 */
EOF
echo 'UNICODE3_HEADER' >> $2
cat tmp/W.c >> $2
echo 'UNICODE3_FOOTER' >> $2
echo " 100% done."
}


rm -rf tmp
mkdir tmp

echo -n "Search for lowecase characters...	"
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$" < unicode3.dat > tmp/tmp1.c
echo " 100% done."
echo -n "Search for uppercase characters...	"
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F];$" < unicode3.dat > tmp/tmp2.c
echo " 100% done."
echo -n "Search for 'logical space' characters...	"
grep ";0;WS;" < unicode3.dat > tmp/tmp3.c
echo " 100% done."
echo -n "Search for modified/compatible characters...	"
grep -E "^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[0-9A-F][0-9A-F][0-9A-F][0-9A-F](( [0-9A-F][0-9A-F][0-9A-F][0-9A-F])?);" < unicode3.dat > tmp/tmp4.c
echo " 100% done."
tabtoheader tmp/tmp1.c unicode3_lowers.h
tabtoheader tmp/tmp2.c unicode3_uppers.h
tabtoheader tmp/tmp3.c unicode3_spaces.h
tabtoheader tmp/tmp4.c unicode3_basechars.h
tabtoheader unicode3.dat unicode3_all_chars.h

rm -rf tmp