File: ctc_decoder_utils.py

package info (click to toggle)
pytorch-audio 0.13.1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 8,592 kB
  • sloc: python: 41,137; cpp: 8,016; sh: 3,538; makefile: 24
file content (78 lines) | stat: -rw-r--r-- 2,467 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
from torchaudio.models.decoder import CTCDecoderLM, CTCDecoderLMState
from torchaudio.models.decoder._ctc_decoder import _create_word_dict, _Dictionary, _KenLM, _load_words


class CustomZeroLM(CTCDecoderLM):
    def __init__(self):
        CTCDecoderLM.__init__(self)

    def start(self, start_with_nothing: bool):
        return CTCDecoderLMState()

    def score(self, state: CTCDecoderLMState, token_index: int):
        return (state.child(token_index), 0.0)

    def finish(self, state: CTCDecoderLMState):
        return (state, 0.0)


class CustomKenLM(CTCDecoderLM):
    def __init__(self, kenlm_file, dict_file):
        CTCDecoderLM.__init__(self)
        kenlm_dict = _create_word_dict(_load_words(dict_file))
        self.model = _KenLM(kenlm_file, kenlm_dict)

    def start(self, start_with_nothing: bool):
        return self.model.start(start_with_nothing)

    def score(self, state: CTCDecoderLMState, token_index: int):
        return self.model.score(state, token_index)

    def finish(self, state: CTCDecoderLMState):
        return self.model.finish(state)


class BiasedLM(torch.nn.Module):
    def __init__(self, dict_file, keyword):
        super(BiasedLM, self).__init__()
        self.dictionary = _Dictionary(dict_file)
        self.keyword = keyword

    def forward(self, token_idx):
        if self.dictionary.get_entry(token_idx) == self.keyword:
            return torch.tensor(10)
        elif self.dictionary.get_entry(token_idx) == "<unk>":
            return torch.tensor(-torch.inf)
        return torch.tensor(-10)


class CustomBiasedLM(CTCDecoderLM):
    def __init__(self, model, dict_file):
        CTCDecoderLM.__init__(self)
        self.model = model
        self.vocab = _Dictionary(dict_file)
        self.eos = self.vocab.get_index("|")
        self.states = {}

        model.eval()

    def start(self, start_with_nothing: bool = False):
        state = CTCDecoderLMState()
        with torch.no_grad():
            score = self.model(self.eos)

        self.states[state] = score
        return state

    def score(self, state: CTCDecoderLMState, token_index: int):
        outstate = state.child(token_index)
        if outstate not in self.states:
            score = self.model(token_index)
            self.states[outstate] = score
        score = self.states[outstate]

        return outstate, score

    def finish(self, state: CTCDecoderLMState):
        return self.score(state, self.eos)