1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
# Copyright (c) 2006 Carnegie Mellon University
#
# You may copy and modify this freely under the same terms as
# Sphinx-III
"""Read/write Sphinx-III binary parameter files.
All the various binary parameter files created by SphinxTrain and used
by Sphinx-III and PocketSphinx share a common file format. This
module contains some base classes for reading and writing these files.
"""
__author__ = "David Huggins-Daines <dhdaines@gmail.com>"
__version__ = "$Revision$"
from struct import unpack, pack
from numpy import reshape, shape, frombuffer
class S3File:
"Read Sphinx-III binary files"
def __init__(self, filename=None, mode="rb"):
self.fh = None
if filename is not None:
self.open(filename, mode)
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.close()
return False
def getall(self):
return self._params
def __getitem__(self, key):
return self._params[key]
def __setitem__(self, key, value):
self._params[key] = value
def __delitem__(self, key):
del self._params[key]
def __iter__(self):
return iter(self._params)
def __len__(self):
return len(self._params)
def open(self, filename, mode="rb"):
self.filename = filename
self.fh = open(filename, mode)
self.readheader()
def close(self):
if self.fh is not None:
self.fh.close()
self.fh = None
def readheader(self):
"""
Read binary header. Sets the following attributes:
- fileattr (a dictionary of attribute-value pairs)
- swap (a byteswap string as used by the struct module)
- otherend (a flag indicating if the file is wrong-endian
for the current platform)
- data_start (offset of the start of data in the file)
"""
spam = self.fh.readline()
self.fileattr = {}
if spam != b"s3\n":
raise Exception("File ID not found or invalid: " + spam)
while True:
spam = self.fh.readline()
if spam == b"":
raise Exception("EOF while reading headers")
if spam.endswith(b"endhdr\n"):
break
sp = spam.find(b' ')
k = spam[0:sp].strip().decode('utf-8')
v = spam[sp:].strip().decode('utf-8')
self.fileattr[k] = v
# This is 0x11223344 in the file's byte order
spam = unpack("<i", self.fh.read(4))[0]
if spam == 0x11223344:
self.swap = "<" # little endian
elif spam == 0x44332211:
self.swap = ">" # big endian
else:
raise Exception("Invalid byte-order mark %08x" % spam)
# Now determine whether we need to swap to get to native
# byteorder (shouldn't this be easier???)
self.otherend = (unpack('=i', pack(self.swap + 'i', spam))[0] != spam)
self.data_start = self.fh.tell()
def read3d(self):
self.d1 = unpack(self.swap + "I", self.fh.read(4))[0]
self.d2 = unpack(self.swap + "I", self.fh.read(4))[0]
self.d3 = unpack(self.swap + "I", self.fh.read(4))[0]
self._nfloats = unpack(self.swap + "I", self.fh.read(4))[0]
if self._nfloats != self.d1 * self.d2 * self.d3:
raise Exception(("Number of data points %d doesn't match "
+ "total %d = %d*%d*%d")
%
(self._nfloats,
self.d1 * self.d2 * self.d3,
self.d1, self.d2, self.d3))
spam = self.fh.read(self._nfloats * 4)
params = frombuffer(spam, 'f').copy()
if self.otherend:
params = params.byteswap()
return reshape(params, (self.d1, self.d2, self.d3)).astype('d')
def read2d(self):
self.d1 = unpack(self.swap + "I", self.fh.read(4))[0]
self.d2 = unpack(self.swap + "I", self.fh.read(4))[0]
self._nfloats = unpack(self.swap + "I", self.fh.read(4))[0]
if self._nfloats != self.d1 * self.d2:
raise Exception(("Number of data points %d doesn't match "
+ "total %d = %d*%d")
%
(self._nfloats,
self.d1 * self.d2,
self.d1, self.d2))
spam = self.fh.read(self._nfloats * 4)
params = frombuffer(spam, 'f').copy()
if self.otherend:
params = params.byteswap()
return reshape(params, (self.d1, self.d2)).astype('d')
def read1d(self):
self.d1 = unpack(self.swap + "I", self.fh.read(4))[0]
self._nfloats = unpack(self.swap + "I", self.fh.read(4))[0]
if self._nfloats != self.d1:
raise Exception(("Number of data points %d doesn't match "
+ "total %d")
%
(self._nfloats, self.d1))
spam = self.fh.read(self._nfloats * 4)
params = frombuffer(spam, 'f').copy()
if self.otherend:
params = params.byteswap()
return params.astype('d')
class S3File_write:
"Write Sphinx-III binary files"
def __init__(self, filename=None, mode="wb", attr={"version": "1.0"}):
self.fh = None
self.fileattr = attr
if filename is not None:
self.open(filename)
def open(self, filename):
self.filename = filename
self.fh = open(filename, "wb")
self.writeheader()
def close(self):
if self.fh is not None:
self.fh.close()
self.fh = None
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.close()
return False
def writeheader(self):
self.fh.write(b"s3\n")
for k, v in self.fileattr.items():
self.fh.write(("%s %s\n" % (k, v)).encode('utf-8'))
# Make sure the binary data lives on a 4-byte boundary
lsb = (self.fh.tell() + len(b"endhdr\n")) & 3
if lsb != 0:
align = 4-lsb
self.fh.write(b"%sendhdr\n" % (b" " * align))
else:
self.fh.write(b"endhdr\n")
self.fh.write(pack("=i", 0x11223344))
self.data_start = self.fh.tell()
def write3d(self, stuff):
d1, d2, d3 = shape(stuff)
self.fh.write(pack("=IIII",
d1, d2, d3,
d1 * d2 * d3))
stuff.ravel().astype('f').tofile(self.fh)
def write2d(self, stuff):
d1, d2 = shape(stuff)
self.fh.write(pack("=III",
d1, d2,
d1 * d2))
stuff.ravel().astype('f').tofile(self.fh)
def write1d(self, stuff):
d1 = len(stuff)
self.fh.write(pack("=II", d1, d1))
stuff.ravel().astype('f').tofile(self.fh)
|