File: s2mfc.py

package info (click to toggle)
sphinxtrain 5.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 32,572 kB
  • sloc: ansic: 94,052; perl: 8,939; python: 6,702; cpp: 2,044; makefile: 6
file content (146 lines) | stat: -rw-r--r-- 3,973 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (c) 2006 Carnegie Mellon University
#
# You may copy and modify this freely under the same terms as
# Sphinx-III

"""Read and write Sphinx feature files.

This module reads and writes the acoustic feature files used by all
Sphinx speech recognition systems.
"""

__author__ = "David Huggins-Daines <dhdaines@gmail.com>"
__version__ = "$Revision$"

from struct import unpack, pack
from numpy import array, reshape
import io


def open(f, mode="rb", veclen=13):
    """Open a Sphinx-II format feature file for reading or writing.
    The mode parameter is 'rb' (reading) or 'wb' (writing)."""
    if mode is None:
        if hasattr(f, 'mode'):
            mode = f.mode
        else:
            mode = 'rb'
    if mode in ('r', 'rb'):
        return S2Feat_read(f, veclen)
    elif mode in ('w', 'wb'):
        return S2Feat_write(f, veclen)
    else:
        raise Exception("mode must be 'r', 'rb', 'w', or 'wb'")


class S2Feat_read(object):
    "Read Sphinx-II format feature files"
    def __init__(self, filename=None, veclen=13):
        self.veclen = veclen
        self.fh = None
        if (filename != None):
            self.open(filename)

    def __iter__(self):
        self.fh.seek(4,0)
        return self

    def open(self, filename):
        self.filename = filename
        self.fh = io.open(filename, "rb")
        self.readheader()

    def close(self):
        if self.fh is not None:
            self.fh.close()
        self.fh = None

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.close()
        return False

    def readheader(self):
        self.fh.seek(0,2)
        self.filesize = (self.fh.tell() // 4 - 1);
        self.fh.seek(0,0)
        hdrint = self.fh.read(4)
        self.hdrsize = unpack(">I", hdrint)[0]
        if self.hdrsize != self.filesize:
            self.hdrsize = (unpack("<I", hdrint))[0]
            self.swap = "<"
        else:
            self.swap = ">"
        if self.hdrsize != self.filesize:
            raise Exception('header/file size mismatch: %lx vs %lx'
                            % (self.hdrsize, self.filesize))

    def seek(self, idx):
        self.fh.seek(4 + idx * self.veclen * 4, 0)

    def __next__(self):
        vec = self.fh.read(self.veclen * 4)
        if vec == b"":
            raise StopIteration
        return unpack(self.swap + str(self.veclen) + "f", vec)

    def readvec(self):
        return next(self)

    def getall(self):
        self.fh.seek(4,0)
        self._mfc = reshape(
            array(unpack(self.swap + str(self.hdrsize) + "f",
                         self.fh.read(self.hdrsize * 4))),
            (self.hdrsize // self.veclen, self.veclen))
        return self._mfc


class S2Feat_write(object):
    "Write Sphinx-II format feature files"
    def __init__(self, filename=None, veclen=13):
        self.fh = None
        self.veclen = veclen
        self.filesize = 0
        if (filename != None):
            self.open(filename)

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.close()
        return False

    def open(self, filename):
        self.filename = filename
        self.fh = io.open(filename, "wb")
        self.writeheader()

    def close(self):
        if self.fh is not None:
            self.writeheader()
            self.fh.close()
        self.fh = None

    def writeheader(self):
        self.fh.seek(0,0)
        self.fh.write(pack("=i", self.filesize))

    def writevec(self, vec):
        if len(vec) != self.veclen:
            raise Exception("Vector length must be %d" % self.veclen)
        self.fh.write(pack("=" + str(self.veclen) + "f", *vec))
        self.filesize = self.filesize + self.veclen

    def writeall(self, arr):
        for row in arr:
            self.writevec(row)