File: lm_test.py

package info (click to toggle)
pocketsphinx 5.0.4-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,236 kB
  • sloc: ansic: 54,519; python: 2,438; sh: 566; cpp: 410; perl: 342; yacc: 93; lex: 50; makefile: 30
file content (73 lines) | stat: -rw-r--r-- 2,531 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/python

import unittest
import os
from pocketsphinx import Decoder, NGramModel

DATADIR = os.path.join(os.path.dirname(__file__), "../../test/data")


class TestLM(unittest.TestCase):
    def test_lm(self):
        # Create a decoder with a broken dictionary
        decoder = Decoder(dict=os.path.join(DATADIR, "defective.dic"))

        decoder.start_utt()
        with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
            while True:
                buf = stream.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                else:
                    break
        decoder.end_utt()
        print("Decoding with default settings:", decoder.hyp().hypstr)
        self.assertEqual("", decoder.hyp().hypstr)

        # Load "turtle" language model and decode again.
        lm = NGramModel(
            decoder.config,
            decoder.logmath,
            os.path.join(DATADIR, "turtle.lm.bin"),
        )
        print(lm.prob(["you"]))
        print(lm.prob(["are", "you"]))
        print(lm.prob(["you", "are", "what"]))
        print(lm.prob(["lost", "are", "you"]))

        decoder.add_lm("turtle", lm)
        self.assertNotEqual(decoder.current_search(), "turtle")
        decoder.activate_search("turtle")
        self.assertEqual(decoder.current_search(), "turtle")
        decoder.start_utt()
        with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
            while True:
                buf = stream.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                else:
                    break
        decoder.end_utt()

        print('Decoding with "turtle" language:', decoder.hyp().hypstr)
        self.assertEqual("", decoder.hyp().hypstr)

        # The word 'meters' isn't in the loaded dictionary.
        # Let's add it manually.
        decoder.add_word("foobie", "F UW B IY", False)
        decoder.add_word("meters", "M IY T ER Z", True)
        decoder.start_utt()
        with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
            while True:
                buf = stream.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                else:
                    break
        decoder.end_utt()
        print("Decoding with customized language:", decoder.hyp().hypstr)
        self.assertEqual("foobie meters meters", decoder.hyp().hypstr)


if __name__ == "__main__":
    unittest.main()