1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
#!/bin/sh
if [ "$1" = "--python" ]; then
exit 77
fi
TOOLDIR=../../tools/src
if [ "$srcdir" = "" ]; then
srcdir="./";
fi
# Prerequisites:
if ! $TOOLDIR/hfst-lexc -q < $srcdir/tokenize-backtrack.lexc > $srcdir/tokenize-backtrack-gen.hfst; then
echo punct backtrack gen fail
exit 1
fi
if ! $TOOLDIR/hfst-invert < $srcdir/tokenize-backtrack-gen.hfst > $srcdir/tokenize-backtrack.hfst; then
echo invert backtrack fail
exit 1
fi
if ! $TOOLDIR/hfst-pmatch2fst < $srcdir/tokenize-backtrack.pmscript > $srcdir/tokenize-backtrack.pmhfst; then
echo pmatch2fst backtrack fail
exit 1
fi
# Only --giella-cg supports this:
if ! echo "busse skuvla skuvla busse Jan." | $TOOLDIR/hfst-tokenize --giella-cg $srcdir/tokenize-backtrack.pmhfst > test.strings ; then
echo tokenize --giella-cg fail:
cat test.strings
exit 1
fi
if ! diff test.strings $srcdir/tokenize-backtrack-out-giella-cg.strings ; then
echo diff test.strings $srcdir/tokenize-backtrack-out-giella-cg.strings
exit 1
fi
if ! echo "su. su" | $TOOLDIR/hfst-tokenize --giella-cg $srcdir/tokenize-backtrack.pmhfst > test.strings ; then
echo tokenize --giella-cg contiguous fail:
cat test.strings
exit 1
fi
if ! diff test.strings $srcdir/tokenize-backtrack-out-giella-cg-contiguous.strings ; then
echo diff test.strings $srcdir/tokenize-backtrack-out-giella-cg-contiguous.strings
exit 1
fi
if ! echo "njeallje logi guokte" | $TOOLDIR/hfst-tokenize --giella-cg $srcdir/tokenize-backtrack.pmhfst > test.strings ; then
echo tokenize --giella-cg contiguous fail:
cat test.strings
exit 1
fi
if ! diff test.strings $srcdir/tokenize-backtrack-out-giella-cg-spaces.strings ; then
echo diff test.strings $srcdir/tokenize-backtrack-out-giella-cg-spaces.strings
exit 1
fi
rm test.strings tokenize-backtrack.pmhfst tokenize-backtrack.hfst tokenize-backtrack-gen.hfst
exit 0
|