1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
#!/bin/sh
#
# $Id: unicodetab2h.sh,v 1.3.2.1 2011/01/18 14:36:37 source Exp $
#
# This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
# project.
#
# Copyright (C) 1998-2009 OpenLink Software
#
# This project is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; only version 2 of the License, dated June 1991.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# ----------------------------------------------------------------------
# Fix issues with LOCALE
# ----------------------------------------------------------------------
LANG=C
LC_ALL=POSIX
export LANG LC_ALL
tabtoheader()
{
echo -n "Translation from Unicode3 table to $2 ... "
sed 's/^\(.*\)$/UNICODE3_REC(\1),/g' < $1 > tmp/4.c
sed 's/^UNICODE3_REC(\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^)]*\)),$/UNICODE3_REC( 0x\1 , "\3" , \4 , "\5" ,e \7,f \8, UNICODE3_F6(\6) , "\2")/g' < tmp/4.c > tmp/5.c
sed 's/,f \([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^;]*\);\([^,]*\), UNICODE3_F6(\([^)]*\)) ,/,n \1,o \2,h \8,i \7,j \6,m \3,UNICODE3_F6(\9),k \5,l \4,/g' < tmp/5.c > tmp/6.c
echo -n " 20% done... "
sed 's/,e ,/,e -1,/g' < tmp/6.c > tmp/7.c
sed 's/,e \([^,]*\),/, \1 ,/g' < tmp/7.c > tmp/8.c
sed 's/,i ,/,i 0000,/g' < tmp/8.c > tmp/9.c
sed 's/,i \([^,]*\),/, 0x\1 ,/g' < tmp/9.c > tmp/A.c
sed 's/,j ,/,j 0000,/g' < tmp/A.c > tmp/B.c
sed 's/,j \([^,]*\),/, 0x\1 ,/g' < tmp/B.c > tmp/C.c
sed 's/,h ,/,h 0000,/g' < tmp/C.c > tmp/D.c
sed 's/,h \([^,]*\),/, 0x\1 ,/g' < tmp/D.c > tmp/E.c
sed 's/,m Y,/, 1 ,/g' < tmp/E.c > tmp/F.c
sed 's/,m N,/, 0 ,/g' < tmp/F.c > tmp/G.c
sed 's/,n ,/,n -1,/g' < tmp/G.c > tmp/H.c
sed 's/,n \([^,]*\),/, \1 ,/g' < tmp/H.c > tmp/I.c
sed 's/,o ,/,o -1,/g' < tmp/I.c > tmp/J.c
sed 's/,o \([^,]*\),/, \1.0 ,/g' < tmp/J.c > tmp/K.c
sed 's/,k \([^,]*\),/, "\1" ,/g' < tmp/K.c > tmp/L.c
sed 's/,l \([^,]*\),/, "\1" ,/g' < tmp/L.c > tmp/M.c
echo -n " 70% done... "
sed 's/,UNICODE3_F6(),/, NULL,/g' < tmp/M.c > tmp/N.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\)),/, UNICODE3_S1(UNICODE3_\2, 0x\4),/g' < tmp/N.c > tmp/O.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S2(UNICODE3_\2, 0x\4, 0x\5),/g' < tmp/O.c > tmp/P.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S3(UNICODE3_\2, 0x\4, 0x\5, 0x\6),/g' < tmp/P.c > tmp/Q.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S4(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7),/g' < tmp/Q.c > tmp/R.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S5(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8),/g' < tmp/R.c > tmp/S.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\) \([0-9A-F]*\)),/, UNICODE3_S6(UNICODE3_\2, 0x\4, 0x\5, 0x\6, 0x\7, 0x\8, 0x\9),/g' < tmp/S.c > tmp/T.c
sed 's/,UNICODE3_F6(\([<]\)\([^>]*\)\([>][ ]*\)\([0-9A-F ]*\)),/, UNICODE3_SX(UNICODE3_\2, UNICODE3_long_ligature("\4")),/g' < tmp/T.c > tmp/U.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S1(UNICODE3_exact, 0x\2),/g' < tmp/U.c > tmp/V.c
sed 's/,UNICODE3_F6(\([ ]*\)\([0-9A-F]*\)\([ ]*\)\([0-9A-F]*\)\([ ]*\)),/, UNICODE3_S2(UNICODE3_exact, 0x\2, 0x\4),/g' < tmp/V.c > tmp/W.c
cat > $2 <<-EOF
/*
* This file is generated by unicodetab2h.sh
*
* This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
* project.
*
* Copyright (C) 1998-2006 OpenLink Software
*
* This project is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; only version 2 of the License, dated June 1991.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*
*/
EOF
echo 'UNICODE3_HEADER' >> $2
cat tmp/W.c >> $2
echo 'UNICODE3_FOOTER' >> $2
echo " 100% done."
}
rm -rf tmp
mkdir tmp
echo -n "Search for lowecase characters... "
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$" < unicode3.dat > tmp/tmp1.c
echo " 100% done."
echo -n "Search for uppercase characters... "
grep ";[0-9A-F][0-9A-F][0-9A-F][0-9A-F];$" < unicode3.dat > tmp/tmp2.c
echo " 100% done."
echo -n "Search for 'logical space' characters... "
grep ";0;WS;" < unicode3.dat > tmp/tmp3.c
echo " 100% done."
echo -n "Search for modified/compatible characters... "
grep -E "^[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[0-9A-F][0-9A-F][0-9A-F][0-9A-F](( [0-9A-F][0-9A-F][0-9A-F][0-9A-F])?);" < unicode3.dat > tmp/tmp4.c
echo " 100% done."
tabtoheader tmp/tmp1.c unicode3_lowers.h
tabtoheader tmp/tmp2.c unicode3_uppers.h
tabtoheader tmp/tmp3.c unicode3_spaces.h
tabtoheader tmp/tmp4.c unicode3_basechars.h
tabtoheader unicode3.dat unicode3_all_chars.h
rm -rf tmp
|