File: track_example.cc

package info (click to toggle)
speech-tools 1%3A2.5.0-11
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 9,988 kB
  • sloc: cpp: 67,350; ansic: 12,174; sh: 4,055; java: 3,748; makefile: 1,111; lisp: 711; perl: 396; awk: 85; xml: 9
file content (545 lines) | stat: -rw-r--r-- 17,720 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
 /************************************************************************/
 /*                                                                      */
 /*                Centre for Speech Technology Research                 */
 /*                     University of Edinburgh, UK                      */
 /*                       Copyright (c) 1996,1997                        */
 /*                        All Rights Reserved.                          */
 /*                                                                      */
 /*  Permission is hereby granted, free of charge, to use and distribute */
 /*  this software and its documentation without restriction, including  */
 /*  without limitation the rights to use, copy, modify, merge, publish, */
 /*  distribute, sublicense, and/or sell copies of this work, and to     */
 /*  permit persons to whom this work is furnished to do so, subject to  */
 /*  the following conditions:                                           */
 /*   1. The code must retain the above copyright notice, this list of   */
 /*      conditions and the following disclaimer.                        */
 /*   2. Any modifications must be clearly marked as such.               */
 /*   3. Original authors' names are not deleted.                        */
 /*   4. The authors' names are not used to endorse or promote products  */
 /*      derived from this software without specific prior written       */
 /*      permission.                                                     */
 /*                                                                      */
 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
 /*  THIS SOFTWARE.                                                      */
 /*                                                                      */
 /*************************************************************************/
 /*                                                                       */
 /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)             */
 /*                   Date: Fri May  9 1997                               */
 /* -------------------------------------------------------------------   */
 /* Example of declaration and use of tracks.                             */
 /*                                                                       */
 /*************************************************************************/


#include <iostream>
#include <cstdlib>
#include "EST_Track.h"
#include "EST_Wave.h"
#include "EST_sigpr.h"
#include "EST_error.h"


/** @name EST_Track class example code
  * @toc
  * Some examples of track manipulations.
  *
  */
//@{

int main(void)

{
    int i, j;

    /* This program is designed as an example not as something to run
       so for testing purpose it simply exists */
    exit(0);
    /**@name Initialising and Resizing a Track

       The constructor functions can be used to create a track with
       zero frames and channels or a track with a specified number of
       frames and channels
    */

    //@{
    //@{ code
    EST_Track tr;           // default track declaration
    EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
    //@} code

    /** tracks can be resized at any time:
     */
    //@{ code
    tr.resize(10, 500); // resize track to have 10 frames and 500 channels
    tr.resize(500, 10); // resize track to have 500 frames and 10 channels
    //@} code

    /** by default, resizing preserves values in the track. This
	may involve copying some information, so if the existing values
	are not needed, a flag can be set which usually results in 
	quicker resizing
    */
    //@{ code
    tr.resize(250, 5, 0);  // throw away any existing values
    //@} code
    /** If only the number of channels or the number of frames needs
	to be changed, this an be done with the following functions:
    */

    //@{ code
    tr.set_num_channels(10);   // makes 10 channels, keeps same no of frames

    tr.set_num_frames(400);    // makes 400 frames, keeps same no of channels
    //@} code
    /** The preserve flag works in the same way with these functions 
     */
    //@}

    /** @name Simple Access
     
	Values in the track can be accessed and set by frame
	number and channel number.

	The following resizes a track to have 500 frames and 10 channels
	and fills every position with -5.
    */
    //@{
    //@{ code
    tr.resize(500, 10); 

    for (i = 0; i < tr.num_frames(); ++i)
	for (j = 0; j < tr.num_channels(); ++j)
	    tr.a(i, j) = -5.0;

    //@} code

    /** A well formed track will have a time value, specified in seconds,
	for every frame. The time array can be filled directly:
    */
    //@{ code
    for (i = 0; i < tr.num_frames(); ++i)
	tr.t(i) = (float) i * 0.01;
    //@} code
    /** which fills the time array with values 0.01, 0.02,
	0.03... 5.0. However, A shortcut function is provided for fixed
	frame spacing: 
    */
    //@{ code
    tr.fill_time(0.1);

    //@} code
    /** which performs the same operation as above. Frames do not have
	to be evenly spaced, in pitch synchronous processing the time
	array holds the time position of each pitch period. In such
	cases each position in the time array must obviously be set
	individually.</para><para>

	Some representations have undefined values during certain
	sections of the track, for example the F0 value during
	unvoiced speech.</para><para>

	The break/value array can be used to specify if a frame has an
	undefined value.<para></para>. If a frame in this array is 1,
	that means the amplitude is defined at that point. If 0, the
	amplitude is undefined. By default, every frame has a value.
	</para><para>

	Breaks (undefined values) can be set by <method>set_break()
	</method>. The following sets every frame from 50 to 99 as a
	break:
    */
    //@{ code
    for (i = 50; i < 100; ++i)
	tr.set_break(i);
    //@} code
    /** frames can be turned back to values as follows:
     */
    //@{ code
    for (i = 50; i < 100; ++i)
	tr.set_value(i);
    //@} code
    /** It is up to individual functions to decide how to interpret breaks.
	</para><para>
	A frame's status can be checked as follows:
    */
    //@{ code
    if (tr.val(60))
	cout << "Frame 60 is not a break\n";

    if (tr.track_break(60))
	cout << "Frame 60 is a break\n";
    //@} code
    //@}

    /** @name Naming Channels
	@id tr-example-naming-channels

	While channels can be accessed by their index, it is often useful
	to give them names and refer to them by those names.

	The set_channel_name() function sets the name of a single channel:
    */
    //@{
    //@{ code
    tr.set_channel_name("F0", 0); 
    tr.set_channel_name("energy", 1); 
    //@} code

    /** An alternative is to use a predefined set of channel names
	stored in a <emphasis>map</emphasis>.A track map
	is simply a String List strings which describe a channel name
	configuration. The <method>resize</method> function can take
	this and resize the number of channels to the number of channels
	indicated in the map, and give each channel its name from the 
	map. For example:
    */
    //@{ code
    EST_StrList map;
    map.append("F0");
    map.append("energy");

    tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
    //@} code

    /** A convention is used for channels which comprise
	components of a multi-dimensional analysis such as
	cepstra. In such cases the channels are named
	<replaceable>TYPE_I</replaceable>.  The last coefficient is
	always names <replaceable>TYPE_N</replaceable> regardless of
	the number of coefficients. This is very useful in extracting
	a set of related  channels without needing to know the order
	of the analysis.

	For example, a track map might look like:

    */
    //@{ code

    map.clear();
    map.append("F0");
    map.append("energy");

    map.append("cep_0");
    map.append("cep_1");
    map.append("cep_2");
    map.append("cep_3");
    map.append("cep_4");
    map.append("cep_5");
    map.append("cep_6");
    map.append("cep_7");
    map.append("cep_N");

    tr.resize(500, map); // makes a 11 channel track and sets the names
    //@} code

    /** This obviously gets unwieldy quite quickly, so the mapping
	mechanism provides a short hand for multi-dimensional data.

    */

    //@{ code
    map.clear();
    map.append("F0");
    map.append("energy");

    map.append("$cep-0+8");

    tr.resize(500, map); // does exactly as above
    //@} code

    /** Here $ indicates the special status, "cep" the name of the
	coefficients, "-0" that the first is number 0 and "+8" that
	there are 8 more to follow. 
    */

    //@}


    /** @name Access single frames or single channels.

	@id tr-example-frames-and-channels
      
	Often functions perform their operations on only a single
	frame or channel, and the track class provides a general
	mechanism for doing this.

	Single frames or channels can be accessed as EST_FVectors:
	Given a track with 500 frames and 10 channels, the 50th frame
	can be accessed as:
    */
    //@{
    //@{ code
    EST_FVector tmp_frame;

    tr.frame(tmp_frame, 50);
    //@} code
    /** now tmp_frame is 10 element vector, which is
	a window into tr: any changes to the contents of tmp_frame will
	change tr. tmp_frame cannot be resized. (This operation can
	be thought in standard C terms as tmp_frame being a pointer
	to the 5th frame of tr).
	</para>	<para>
	Likewise with channels:
    */
    //@{ code
    EST_FVector tmp_channel;

    tr.channel(tmp_channel, 5);
    //@} code
    /** Again, tmp_channel is 500 element vector, which is
	a window into tr: any changes to the contents of tmp_channel will
	change tr. tmp_channel cannot be resized. 
	</para><para>
	Channels can also be extracted by name:
    */
    //@{ code
    tr.channel(tmp_channel, "energy");
    //@} code
    /** not all the channels need be put into the temporary frame.
	Imagine we have a track with a F0 channel,a energy channel and
	10 cepstrum channels. The following makes a frame from the
	50th frame, which only includes the cepstral information in
	channels 2 through 11 */
    //@{ code
    tr.frame(tmp_frame, 50, 2, 9);
    //@} code
    /** Likewise, the 5th channel with only the last 100 frames can be set up
	as: */
    //@{ code
    tr.channel(tmp_channel, 5, 400, 100);
    //@} code
    //@}
    /** @name Access multiple frames or channels.
	@id tr-example-sub-tracks
	In addition to extracting single frames and channels, multiple
	frame and channel portions can be extracted in a similar
	way. In the following example, we make a sub-track sub, which
	points to the entire cepstrum portion of a track (channels 2
	through 11) 
    */
    //@{
    //@{ code
    EST_Track sub;

    tr.sub_track(sub, 0, EST_ALL, 2, 9);

    //@} code

    /** <parameter>sub</parameter> behaves exactly like a normal
	track in every way, except that it cannot be resized. Its
	contents behave like a point into the designated portion of
	<parameter>tr</parameter>, so changing
	<parameter>sub</parameter> will change<parameter>
	tr</parameter>.

	</para><para> The first argument is the
	<parameter>sub</parameter> track. The second states the start
	frame and the total number of frames required. EST_ALL is a
	special constant that specifies that all the frames are
	required here. The next argument is the start channel number
	(remember channels are numbered from 0), and the last argument
	is the total number of channels required.  </para><para>

	This facility is particularly useful for using standard
	signal processing functions efficiently. For example,
	the <function>melcep</function> in the signal processing library
	takes a waveform and produces a mel-scale cepstrum. It determines
	the order of the cepstral analysis by the number of channels in
	the track it is given, which has already been allocated to have
	the correct number of frames and channels.

	</para><para> The following will process the waveform
	<parameter>sig</parameter>, produce a 10th order mel cepstrum
	and place the output in <parameter>sub</parameter>. (For
	explanation of the other options see
	<function>melcep</function> */
    //@{ code
    EST_Wave sig;

    melcep(sig, sub, 1.0, 20, 22); 
    //@} code

    /** because we have made<parameter>sub</parameter> a window
	into<parameter> tr</parameter>, the melcep function writes its
	output into the correct location, i.e. channels 2-11 of tr. If
	it were no for the sub_track facility, either a separate track
	of the right size would be passed into melcep and then it
	would be copied into tr (wasteful), or else tr would be passed
	in and other arguments would have to specify which channels
	should be written to (messy).  </para><para> 

	Sub-tracks can also be set using channel names. The
	following example does exactly as above, but is referenced by
	the name of the first channel required and the number of
	channels to follow: */
    //@{ code

    tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
    //@} code
    /** and this specifies the end by a string also:
     */
    //@{ code
    tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
    //@} code
    /** sub_tracks can be any set of continuous frames and
        channels. For example if a word started at frame 43 and ended
        and frame 86, the following would set a sub track to that
        portion: */
    //@{ code

    tr.sub_track(sub, 47, 39, "cep_0", "cep_N");

    //@} code

    /** We can step through the frames of a Track using a standard
     * iterator. The frames are returned as one-frame sub-tracks.
     */

    //@{ code
    EST_Track::Entries frames;

    // print out the time of every 50th track
    cout << "Times:";

    for (frames.begin(tr); frames; ++frames)
      {
	const EST_Track &frame = *frames;
	if (frames.n() % 50 ==0)
	    cout << " " << frames.n() << "[" << frame.t() << "]";
    }
    cout << "\n";
	   
    //@} code

    /** The <function>channel</function>, <function>frame</function>
	and <function>sub_track</function> functions are most commonly
	used to write into a track using a convenient
	sub-portion. Sometimes, however a simple copy is required
	whose contents can be written without affecting the original.

	The <member>copy_cub_track</member> function does this */
    //@{ code
    EST_Track tr_copy;
    
//    tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
    //@} code

    /** Individual frames and channels can be copied out into
	pre-allocated float * arrays as follows:
    */
    //@{ code
    float *channel_buf, *frame_buf;
    channel_buf = new float[tr.num_frames()];
    frame_buf = new float[tr.num_channels()];

    tr.copy_channel_out(5, channel_buf);   // copy channel 5 into channel_buf
    tr.copy_frame_out(43, frame_buf);      // copy frame 4 into frame_buf
    //@} code

    /** Individual frames and channels can be copied into the track
	from float * arrays as follows:
    */
    //@{ code
    tr.copy_channel_in(5, channel_buf);    // copy channel_buf into channel 5 
    tr.copy_frame_in(43, frame_buf);       // copy frame_buf into frame 4
    //@} code
    //@}


    /** @name Auxiliary Channels
	Auxiliary channels are used for storing frame information other than
	amplitude coefficients, for example voicing decisions and points of
	interest in the track.

	Auxiliary channels always have the same number of frames as the
	amplitude channels. They are resized by assigning names to the
	channels that need to be created:
    */
    //@{
    //@{ code


    EST_StrList aux_names;

    aux_names.append("voicing");
    aux_names.append("join_points");
    aux_names.append("cost");

    tr.resize_aux(aux_names);

    //@} code
    /** The following fills in these three channels with some values:
     */
    //@{ code

    for (i = 0; i < 500; ++i)
    {
	tr.aux(i, "voicing") = i;
	tr.aux(i, "join_points") = EST_String("stuff");
	tr.aux(i, "cost") =  0.111;
    }
    //@} code
    //@}

    /** @name File I/O 
	Tracks in various formats can be saved and loaded:

	Save as a HTK file:
    */
    //@{
    //@{ code
    if (tr.save("tmp/track.htk", "htk") != write_ok)
	EST_error("can't save htk file\n");
    //@} code
    /** Save as a EST file:
     */
    //@{ code
    if (tr.save("tmp/track.est", "est") != write_ok)
	EST_error("can't save est file\n");
    //@} code
    /** Save as an ascii file:
     */
    //@{ code
    if (tr.save("tmp/track.ascii", "ascii") != write_ok)
	EST_error("can't save ascii file\n");
    //@} code
    /** The file type is automatically determined from the file's
	header during loading:
    */
    //@{ code

    EST_Track tr2;
    if (tr2.load("tmp/track.htk") != read_ok)
	EST_error("can't reload htk\n");
    //@} code

    /** If no header is found, the function assumes the
	file is ascii data, with a fixed frame shift, arranged with rows
	representing frames and columns channels. In this case, the
	frame shift must be specified as an argument to this function:
    */
    //@{ code
    if (tr.load("tmp/track.ascii", 0.01) != read_ok)
	EST_error("can't reload ascii file\n");
    //@} code
    //@}

    exit(0);
}

//@}