File: HtkFileDataSet.h

package info (click to toggle)
torch 2-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 5,488 kB
  • ctags: 3,217
  • sloc: cpp: 14,272; makefile: 201
file content (104 lines) | stat: -rw-r--r-- 3,334 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// Copyright (C) 2002 Samy Bengio (bengio@idiap.ch)
//                
//
// This file is part of Torch. Release II.
// [The Ultimate Machine Learning Library]
//
// Torch is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// Torch is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Torch; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#ifndef HTK_FILE_DATA_SET_INC
#define HTK_FILE_DATA_SET_INC

#include "StdDataSet.h"
#include "IOHtk.h"
#include "Distribution.h"

namespace Torch {

/** Creates a #StdDataSet# from a disk file in HTK format.
    As there are no targets in HTK datasets, we decided that they
    point to the input vector (hence, can be used for Diabolo type of
    problems for instance). Moreover, a window_size can be given such
    that the real input vector $x_t$ in the concatenation of the
    vectors from $x_{t-d}$ to $x_{t+d}$, where #window_size# is equal
    to $2d+1$.

    Finally, it can also be used to load multiple files at once.

    @author Samy Bengio (bengio@idiap.ch)
 */
class HtkFileDataSet : public StdDataSet
{
  public:
    
    /// the number of files to load
    int n_files;

    /// for each file, an object IOHtk is created
    IOHtk** htk;

    /// the size of the input window
    int window_size;

    //-----

    /** Load #file# in memory.
        \begin{itemize}
          \item #window_size# gives the number of frames per input.
          \item if #max_load# > 0, it loads only the first #max_load# examples.
        \end{itemize}
    */
    HtkFileDataSet(char *file, int window_size_ = 1, int max_load=-1);
    HtkFileDataSet(char **files, int n_files_, int window_size_ = 1, int max_load=-1);

		/** save the data set on htk binary format in specifing directory
		 */
		virtual void write(char* dir_to_save=NULL);

		/** Set pointers on datas stored on #IOHtk# object and remove unused
		   columns */
    virtual void prepareData(bool* input_to_del=NULL);

    /** Normalize the #DataSet# using the normalization values
        of another #DataSet#. If the other #DataSet# isn't 
       normalized, nothing happens...
     */
    virtual void normalizeUsingDataSet(StdDataSet *data_norm);
    
		/// Normalize the database
    virtual void normalize();
    virtual void init();
    
		/** Remove the frames considering as unlinkely and return the number of
		    removed frames and if mask is not NULL remove unused data*/
		virtual int removeUnlikelyFrames(Distribution* likely_distr, Distribution* unlikely_distr,bool* mask = NULL);

		/// Create a mask to remove log energy parameters
    virtual void createMaskFromParam(bool* mask);

    /// Load the normalization parameters
    virtual void loadFILE(FILE *file);

    /// Save the normalization parameters
    virtual void saveFILE(FILE *file);

    virtual ~HtkFileDataSet();
};


}

#endif