1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
#!/usr/bin/python
import unittest
import os
from pocketsphinx import Decoder, NGramModel
DATADIR = os.path.join(os.path.dirname(__file__), "../../test/data")
class TestLM(unittest.TestCase):
def test_lm(self):
# Create a decoder with a broken dictionary
decoder = Decoder(dict=os.path.join(DATADIR, "defective.dic"))
decoder.start_utt()
with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
else:
break
decoder.end_utt()
print("Decoding with default settings:", decoder.hyp().hypstr)
self.assertEqual("", decoder.hyp().hypstr)
# Load "turtle" language model and decode again.
lm = NGramModel(
decoder.config,
decoder.logmath,
os.path.join(DATADIR, "turtle.lm.bin"),
)
print(lm.prob(["you"]))
print(lm.prob(["are", "you"]))
print(lm.prob(["you", "are", "what"]))
print(lm.prob(["lost", "are", "you"]))
decoder.add_lm("turtle", lm)
self.assertNotEqual(decoder.current_search(), "turtle")
decoder.activate_search("turtle")
self.assertEqual(decoder.current_search(), "turtle")
decoder.start_utt()
with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
else:
break
decoder.end_utt()
print('Decoding with "turtle" language:', decoder.hyp().hypstr)
self.assertEqual("", decoder.hyp().hypstr)
# The word 'meters' isn't in the loaded dictionary.
# Let's add it manually.
decoder.add_word("foobie", "F UW B IY", False)
decoder.add_word("meters", "M IY T ER Z", True)
decoder.start_utt()
with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
else:
break
decoder.end_utt()
print("Decoding with customized language:", decoder.hyp().hypstr)
self.assertEqual("foobie meters meters", decoder.hyp().hypstr)
if __name__ == "__main__":
unittest.main()
|