File: proc-functionality.sh

package info (click to toggle)
hfst 3.16.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,532 kB
  • sloc: cpp: 101,875; sh: 6,717; python: 5,225; yacc: 4,985; lex: 2,900; makefile: 2,017; xml: 6
file content (218 lines) | stat: -rwxr-xr-x 6,148 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/bin/sh
TOOLDIR=../../tools/src
TOOL=

if [ "$1" = '--python' ]; then
    TOOL="python3 ./hfst-proc.py"
else
    TOOL=$TOOLDIR/hfst-proc/hfst-apertium-proc
    if ! test -x $TOOL; then
	exit 77;
    fi
fi

if [ "$srcdir" = "" ]; then
    srcdir="./";
fi

# basic lookup
if ! echo "cat" | $TOOL cat2dog.hfstol | tr -d '\r' > test.strings ; then
    echo cat fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-cat-out.strings ; then
    echo cat diffs
    exit 1
fi

# basic generation (reverse-lookup)
if ! echo "^dog$" | $TOOL -g cat2dog.genhfstol | tr -d '\r' > test.strings ; then
    echo dog fail
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat.strings ; then
    echo dog diffs
    exit 1
fi

# weighted lookup
if ! echo "cat" | $TOOL -W cat_weight_final.hfstol | tr -d '\r' > test.strings ; then
    echo heavy cat fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-cat-weighted-out.strings ; then
    echo heavy cat diffs
    exit 1
fi

# capitalization checks
if ! $TOOL proc-caps.hfstol < $srcdir/proc-caps-in.strings | tr -d '\r' > test.strings ; then
    echo uppercase 1 fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-caps-out1.strings ; then
    echo uppercase 1 diffs
    exit 1
fi
if ! $TOOL -g proc-caps.genhfstol < $srcdir/proc-caps-gen.strings | tr -d '\r' > test.strings ; then
    echo uppercase roundtrip fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-caps-out2.strings  ; then
    echo uppercase roundtrip diffs
    exit 1
fi
if ! $TOOL -c proc-caps.hfstol < $srcdir/proc-caps-in.strings | tr -d '\r' > test.strings ; then
    echo uppercase 2 fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-caps-out3.strings ; then
    echo uppercase 2 diffs
    exit 1
fi
if ! $TOOL -w proc-caps.hfstol < $srcdir/proc-caps-in.strings | tr -d '\r' > test.strings ; then
    echo uppercase 3 fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-caps-out4.strings ; then
    echo uppercase 3 diffs
    exit 1
fi
if ! $TOOL --cg --raw proc-caps.hfstol < $srcdir/proc-caps-in.strings | tr -d '\r' > test.strings ; then
    echo raw cg fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-caps-out5.strings ; then
    echo raw cg diffs
    exit 1
fi

# Xerox format tests:
if ! $TOOL --xerox cat_weight_ambig.hfstol < $srcdir/cat_cat.strings | tr -d '\r' > test.strings ; then
    echo xeroc fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat_weight_ambig_xerox.strings ; then
    echo xerox cat_weight_ambig diffs
    exit 1
fi
if ! $TOOL -W --xerox cat_weight_ambig.hfstol < $srcdir/cat_cat.strings | tr -d '\r' > test.strings ; then
    echo xeroc -W fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat_weight_ambig_W_xerox.strings ; then
    echo xerox cat_weight_ambig_W diffs
    exit 1
fi
if ! $TOOL -W --weight-classes 1 --xerox cat_weight_ambig.hfstol < $srcdir/cat_cat.strings | tr -d '\r' > test.strings ; then
    echo xeroc -W --weight-classes 1 fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat_weight_ambig_W1_xerox.strings ; then
    echo xerox --weight-classes 1 cat_weight_ambig_W diffs
    exit 1
fi

# weight-classes checks
if ! $TOOL --weight-classes 1 cat_weight_ambig.hfstol < $srcdir/cat.strings | tr -d '\r' > test.strings ; then
    echo cat_weight_ambig fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat_weight_ambig_out.strings ; then
    echo cat_weight_ambig diffs
    exit 1
fi
if ! $TOOL --weight-classes 2 -W cat_weight_ambig.hfstol < $srcdir/cat.strings | tr -d '\r' > test.strings ; then
    echo cat_weight_ambig_W fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/cat_weight_ambig_W_out.strings ; then
    echo cat_weight_ambig_W diffs
    exit 1
fi

# NUL flush checks
if ! printf 'cat.[][\n]\0cat.[][\n]\0' | $TOOL -z cat2dog.hfstol | tr -d '\r' > test.strings ; then
    echo NUL flush fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-cat-NUL.strings ; then
    echo NUL flush diffs
    exit 1
fi

# Serial unicode ranges check (should really be tested for all --with-unicode-handler configurations):
# TODO: [ıŀʼnĸ] need exceptions in alphabet.cc
perl -CS -e 'for(my $c=0x0100; $c < 0x017F; $c++){ my $l=chr $c; if($c != 0x0131 && $c != 0x0138 && $c != 0x0140 && $c != 0x149 && $l =~ m/\p{Lower}/){ print $l.":".$l." <n>\n"; }}' \
    | $TOOLDIR/hfst-strings2fst -j -S \
    | $TOOLDIR/hfst-minimize          \
    | $TOOLDIR/hfst-fst2fst -O -i -   \
    > proc-serial-unicode.hfstol
if ! $TOOLDIR/hfst-fst2txt proc-serial-unicode.hfstol                  \
        | cut -f3                                                      \
        | perl -CS -ne 'next if /@|^$/;chomp;printf $_." ".uc $_."\n"' \
        | $TOOL proc-serial-unicode.hfstol                             \
        | tr -d '\r' > test.strings ; then
    echo serial unicode fail:
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-serial-unicode.strings ; then
    echo serial unicode diffs
    exit 1
fi
rm proc-serial-unicode.hfstol



# compounding / space handling checks
if ! $TOOL compounds.hfstol < $srcdir/proc-compounds.strings | tr -d '\r' > test.strings ; then
    echo compound fail
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-compounds-out.strings ; then
    echo compound diffs
    exit 1
fi
rm test.strings

# escaping
if ! $TOOL escaping.hfstol < $srcdir/proc-escaping.strings | tr -d '\r' > test.strings ; then
    echo escaping fail
    cat test.strings
    exit 1
fi
if ! diff test.strings $srcdir/proc-escaping-out.strings ; then
    echo escaping diffs
    exit 1
fi
rm test.strings


## skip new test introduced in version 3014...
exit 0

#if ! $TOOL compounds2.hfstol < $srcdir/proc-compounds2.strings | tr -d '\r' > test.strings ; then
#    echo compound fail
#    cat test.strings
#    exit 1
#fi
#if ! diff test.strings $srcdir/proc-compounds2-out.strings ; then
#    exit 1
#fi
#rm test.strings