File: test_examples.py

package info (click to toggle)
hfst 3.16.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,532 kB
  • sloc: cpp: 101,875; sh: 6,717; python: 5,225; yacc: 4,985; lex: 2,900; makefile: 2,017; xml: 6
file content (406 lines) | stat: -rw-r--r-- 15,452 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# -*- coding: utf-8 -*-
# The examples given in doxygen documentation
from __future__ import print_function
import sys
if len(sys.argv) > 1:
    sys.path.insert(0, sys.argv[1])
import hfst

types = []
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.SFST_TYPE):
    types.append(hfst.ImplementationType.SFST_TYPE)
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.TROPICAL_OPENFST_TYPE):
    types.append(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE):
    types.append(hfst.ImplementationType.FOMA_TYPE)

for type in types:
    if hfst.HfstTransducer.is_implementation_type_available(type):

        hfst.set_default_fst_type(type)
        
        # StreamIsClosedException
        try:
            tr = hfst.regex('foo')
            outstr = hfst.HfstOutputStream(filename='testfile')
            outstr.close()
            outstr.write(tr)
        except hfst.exceptions.StreamIsClosedException:
            print("Could not write transducer: stream to file was closed.")
            
        # TransducerIsCyclicException
        transducer = hfst.regex('[a:b]*')
        try:
            results = transducer.extract_paths(output='text')
            print("The transducer has %i paths:" % len(results))
            print(results)
        except hfst.exceptions.TransducerIsCyclicException:
            print("The transducer is cyclic and has an infinite number of paths. Some of them:")
            results = transducer.extract_paths(output='text', max_cycles=5)
            print(results)
        
        # NotTransducerStreamException
        f = open('foofile', 'w')
        f.write('This is an ordinary text file.\n')
        f.close()
        try:
            instr = hfst.HfstInputStream('foofile')
            tr = instr.read()
            print(tr)
            instr.close()
        except hfst.exceptions.NotTransducerStreamException:
            print("Could not print transducer: the file does not contain binary transducers.")
        
        f = open('testfile1.att', 'w')
        f.write('0 1 a b\n1 2 c\n2\n')
        f.close()
        f = open('testfile1.att', 'r')
        try:
            tr = hfst.read_att_transducer(f)
        except hfst.exceptions.NotValidAttFormatException:
            print('Could not read file: it is not in valid ATT format.')
        f.close()
        
        # StateIsNotFinalException
        tr = hfst.HfstBasicTransducer()
        tr.add_state(1)
        # An exception is thrown as state number 1 is not final
        try:
            w = tr.get_final_weight(1)
        except hfst.exceptions.StateIsNotFinalException:
            print("State is not final.")

        # ContextTransducersAreNotAutomataException
        # try:
        #    tr = hfst.regex('a -> b || c:c __ c:d')
        # except hfst.exceptions.ContextTransducersAreNotAutomataException:
        #    print("Context transducers must be automata.")
        
        # TransducersAreNotAutomataException
        tr1 = hfst.regex('foo:bar')
        tr2 = hfst.regex('bar:baz')
        try:
            tr1.cross_product(tr2)
        except hfst.exceptions.TransducersAreNotAutomataException:
            print('Transducers must be automata in cross product.')
        
        # StateIndexOutOfBoundsException
        tr = hfst.HfstBasicTransducer()
        tr.add_state(1)
        try:
            w = tr.get_final_weight(2)
        except hfst.exceptions.StateIndexOutOfBoundsException:
            print('State number 2 does not exist')
        
        # TransducerTypeMismatchException:
        if hfst.ImplementationType.FOMA_TYPE in types:
            hfst.set_default_fst_type(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
            tr1 = hfst.regex('foo')
            tr2 = hfst.regex('bar')
            tr2.convert(hfst.ImplementationType.FOMA_TYPE)
            try:
                tr1.disjunct(tr2)
            except hfst.exceptions.TransducerTypeMismatchException:
                print('The implementation types of transducers must be the same.')

            hfst.set_default_fst_type(type)

        # fst
        # One unweighted identity path:
        if not hfst.fst('foo').compare(hfst.regex('{foo}')):
            raise RuntimeError('')
        # Weighted path: a tuple of string and number, e.g.
        if not hfst.fst(('foo',1.4)).compare(hfst.regex('{foo}::1.4')):
            raise RuntimeError('')
        if not hfst.fst(('bar',-3)).compare(hfst.regex('{bar}::-3')):
            raise RuntimeError('')
        if not hfst.fst(('baz',0)).compare(hfst.regex('{baz}')):
            raise RuntimeError('')
        # Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        if not hfst.fst(['foo', 'bar']).compare(hfst.regex('{foo}|{bar}')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0))).compare(hfst.regex('{foo}|{bar}::5.0')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))).compare(hfst.regex('{foo}|{bar}::5.0|{baz}|{Foo}|{Bar}::2.4')):
            raise RuntimeError('')
        if not hfst.fst([('foo',-1), ('bar',0), ('baz',3.5)]).compare(hfst.regex('{foo}::-1|{bar}|{baz}::3.5')):
            raise RuntimeError('')
        # A dictionary
        if not hfst.fst({'foo':'foo', 'bar':('foo',1.4), 'baz':(('foo',-1),'BAZ')}).compare(hfst.regex('{foo}|{bar}:{foo}::1.4|{baz}:{foo}::-1|{baz}:{BAZ}')):
            raise RuntimeError('')
        
        # tokenized_fst
        tok = hfst.HfstTokenizer()
        tok.add_multichar_symbol('foo')
        tok.add_multichar_symbol('bar')
        tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))
        if not tr.compare(hfst.regex('[foo:foo bar:b 0:a 0:z]')):
            raise RuntimeError('')

        # HfstBasicTransducer
        # Create an empty transducer
        # The transducer has initially one start state (number zero)
        # that is not final
        fsm = hfst.HfstBasicTransducer()
        # Add two states to the transducer
        fsm.add_state(1)
        fsm.add_state(2)
        # Create a transition [foo:bar] leading to state 1 with weight 0.1
        tr = hfst.HfstBasicTransition(1, 'foo', 'bar', 0.1)
        # and add it to state zero
        fsm.add_transition(0, tr)
        # Add a transition [baz:baz] with weight 0 from state 1 to state 2
        fsm.add_transition(1, hfst.HfstBasicTransition(2, 'baz', 'baz', 0.0))
        # Set state 2 as final with weight 0.3
        fsm.set_final_weight(2, 0.3)
        # Go through all states
        for state, arcs in enumerate(fsm):
            for arc in arcs:
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )

        for state in fsm.states():
            for arc in fsm.transitions(state):
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )
        
        # HfstBasicTransducer.disjunct
        lexicon = hfst.HfstBasicTransducer()
        tok = hfst.HfstTokenizer()
        lexicon.disjunct(tok.tokenize('dog'), 0.3)
        lexicon.disjunct(tok.tokenize('cat'), 0.5)
        lexicon.disjunct(tok.tokenize('elephant'), 1.6)
        lexicon = hfst.HfstTransducer(lexicon)
        if not lexicon.compare(hfst.regex('{dog}::0.3|{cat}::0.5|{elephant}::1.6')):
            raise RuntimeError('')
        
        # HfstBasicTransducer.transitions
        for state in fsm.states():
            for arc in fsm.transitions(state):
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )
        
        # HfstBasicTransducer.substitute and HfstTransducer.substitute
        HFST = hfst.regex('a:a')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute('a', 'A', input=True, output=False)
        basic.substitute('a', 'A', input=True, output=False)
        
        HFST = hfst.regex('a a:b b')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute(('a','b'),('A','B'))
        basic.substitute(('a','b'),('A','B'))
        
        HFST = hfst.regex('a a:b b')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute(('a','b'), (('A','B'),('a','B'),('A','b')))
        basic.substitute(('a','b'), (('A','B'),('a','B'),('A','b')))
        
        HFST = hfst.regex('a a:b b')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute(('a','b'), hfst.regex('[a:b]+'))
        basic.substitute(('a','b'), hfst.HfstBasicTransducer(hfst.regex('[a:b]+')))
        
        HFST = hfst.regex('a b c d')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute({'a':'A', 'b':'B', 'c':'C'})
        basic.substitute({'a':'A', 'b':'B', 'c':'C'})
        
        HFST = hfst.regex('a a:b b b:c c c:d d')
        basic = hfst.HfstBasicTransducer(HFST)
        HFST.substitute( {('a','a'):('A','A'), ('b','b'):('B','B'), ('c','c'):('C','C')} )
        basic.substitute( {('a','a'):('A','A'), ('b','b'):('B','B'), ('c','c'):('C','C')} )
        
        # HfstBasicTransducer.enumerate
        for state, arcs in enumerate(fsm):
            for arc in arcs:
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )
        
        # HfstTransducer
        # argument handling
        transducer1 = hfst.regex('foo:bar baz')
        transducer2 = hfst.regex('FOO:BAR BAZ')
        transducer1.reverse()
        transducer1.disjunct(transducer2)
        if not transducer2.compare(hfst.regex('FOO:BAR BAZ')):
            raise RuntimeError('')
        transducer1.reverse()
        transducer1.determinize()
        transducer1.reverse()
        transducer1.determinize()
        # implementation types
        if hfst.ImplementationType.FOMA_TYPE in types:
            tropical_transducer = hfst.regex('foo')
            tropical_transducer.convert(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
            foma_transducer = hfst.regex('foo')
            foma_transducer.convert(hfst.ImplementationType.FOMA_TYPE)
            # TODO: segfaults
            try:
                tropical_transducer.compare(foma_transducer)
            except hfst.exceptions.TransducerTypeMismatchException:
                print('Implementation types of transducers must be the same.')

        # read_att from file
        f = open('testfile2.att', 'w')
        f.write(
"""0 1 foo bar 0.3
1 0.5
--
0 0.0
--
--
0 0.0
0 0 a <eps> 0.2
""")
        f.close()

        transducers = []
        ifile = open('testfile2.att', 'r')
        try:
            while (True):
                t = hfst.read_att_transducer(ifile, '<eps>')
                transducers.append(t)
                print("read one transducer")
        except hfst.exceptions.NotValidAttFormatException:
            print("Error reading transducer: not valid AT&T format.")
        except hfst.exceptions.EndOfStreamException:
            pass
        ifile.close()
        print("Read %i transducers in total" % len(transducers))

# read_att from string
#att_str = """0 1 a b
#1 2 c d
#2
#"""
#print(att_str)
#tr = hfst.read_att(att_str, '@0@')
#print(tr)
#exit(0)

        # write_att
        tr1 = hfst.regex('[foo:bar baz:0 " "]::0.3')
        tr2 = hfst.empty_fst()
        tr3 = hfst.epsilon_fst(0.5)
        tr4 = hfst.regex('[foo]')
        tr5 = hfst.empty_fst()

        f = open('testfile3.att', 'w')
        for tr in [tr1, tr2, tr3, tr4]:
            tr.write_att(f)
            f.write('--\n')
        tr5.write_att(f)
        f.close()

        # extract_paths
        tr = hfst.regex('a:b+ (a:c+)')
        print(tr)
        print(tr.extract_paths(max_cycles=1, output='text'))
        print(tr.extract_paths(max_number=4, output='text'))
        print(tr.extract_paths(max_cycles=1, max_number=4, output='text'))

        # HfstOutputStream
        res = ['foo:bar','0','0 - 0','"?":?','a* b+']
        ostr = hfst.HfstOutputStream(filename='testfile1.hfst')
        for re in res:
            ostr.write(hfst.regex(re))
            ostr.flush()
        ostr.close()
        
        # HfstInputStream
        istr = hfst.HfstInputStream('testfile1.hfst')
        transducers = []
        while not (istr.is_eof()):
            transducers.append(istr.read())
        istr.close()
        if not len(transducers) == len(res):
            raise RuntimeError('')
        i=0
        for tr in transducers:
            if not tr.compare(hfst.regex(res[i])):
                raise RuntimeError('')
            i+=1

# push_weights

# QuickStart (1/3)

        tr1 = hfst.regex('foo:bar')
        tr2 = hfst.regex('bar:baz')
        tr1.compose(tr2)
        print(tr1)

# QuickStart (2/3)

# Create as HFST basic transducer [a:b] with transition weight 0.3 and final weight 0.5.
        t = hfst.HfstBasicTransducer()
        t.add_state(1)
        t.add_transition(0, 1, 'a', 'b', 0.3)
        t.set_final_weight(1, 0.5)

# Convert to tropical OpenFst format (the default, if not set) and push weights toward final state.
        T = hfst.HfstTransducer(t)
        T.push_weights_to_end()

# Convert back to HFST basic transducer.
        tc = hfst.HfstBasicTransducer(T)
        try:
        # Rounding might affect the precision.
            if (0.79 < tc.get_final_weight(1)) and (tc.get_final_weight(1) < 0.81):
                print("TEST PASSED")
            else:
                if hfst.get_default_fst_type() == hfst.ImplementationType.TROPICAL_OPENFST_TYPE:
                    raise RuntimeError('')
                else:
                    pass # ok to fail if weights are not in use
        # If the state does not exist or is not final
        except hfst.exceptions.HfstException:
            print("TEST FAILED: An exception thrown.")
            raise RuntimeError('')

# QuickStart (3/3)

# Create a simple lexicon transducer [[foo bar foo] | [foo bar baz]].
        tok = hfst.HfstTokenizer()
        tok.add_multichar_symbol('foo')
        tok.add_multichar_symbol('bar')
        tok.add_multichar_symbol('baz')
        
        words = hfst.tokenized_fst(tok.tokenize('foobarfoo'))
        t = hfst.tokenized_fst(tok.tokenize('foobarbaz'))
        words.disjunct(t)

# Create a rule transducer that optionally replaces 'bar' with 'baz' between 'foo' and 'foo'.
        rule = hfst.regex('bar (->) baz || foo _ foo')

# Apply the rule transducer to the lexicon.
        words.compose(rule)
        words.minimize()

# Extract all string pairs from the result and print them to standard output.
        results = 0
        try:
            # Extract paths and remove tokenization
            results = words.extract_paths(output='dict')
        except hfst.exceptions.TransducerIsCyclicException:
            # This should not happen because transducer is not cyclic.
            print("TEST FAILED")
            exit(1)

        for input,outputs in results.items():
            print('%s:' % input)
            for output in outputs:
                print('  %s\t%f' % (output[0], output[1]))


# Foma issue