File: sigpr_example.cc

package info (click to toggle)
speech-tools 1%3A2.5.0-11
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 9,988 kB
  • sloc: cpp: 67,350; ansic: 12,174; sh: 4,055; java: 3,748; makefile: 1,111; lisp: 711; perl: 396; awk: 85; xml: 9
file content (641 lines) | stat: -rw-r--r-- 21,578 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
/************************************************************************/
/*                                                                      */
/*                Centre for Speech Technology Research                 */
/*                     University of Edinburgh, UK                      */
/*                       Copyright (c) 1996,1997                        */
/*                        All Rights Reserved.                          */
/*                                                                      */
/*  Permission is hereby granted, free of charge, to use and distribute */
/*  this software and its documentation without restriction, including  */
/*  without limitation the rights to use, copy, modify, merge, publish, */
/*  distribute, sublicense, and/or sell copies of this work, and to     */
/*  permit persons to whom this work is furnished to do so, subject to  */
/*  the following conditions:                                           */
/*   1. The code must retain the above copyright notice, this list of   */
/*      conditions and the following disclaimer.                        */
/*   2. Any modifications must be clearly marked as such.               */
/*   3. Original authors' names are not deleted.                        */
/*   4. The authors' names are not used to endorse or promote products  */
/*      derived from this software without specific prior written       */
/*      permission.                                                     */
/*                                                                      */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
/*  THIS SOFTWARE.                                                      */
/*                                                                      */
/************************************************************************/
/*                                                                      */
/*                 Author: Paul Taylor (pault@cstr.ed.ac.uk)            */
/*                   Date: Fri May  9 1997                              */
/* -------------------------------------------------------------------  */
/* Examples of Generation of Acoustic Feature Vectors from Waveforms    */
/*                                                                      */
/************************************************************************/

#include <cstdlib>
#include "EST_sigpr.h"
#include "EST_cmd_line.h"
#include "EST_inline_utils.h"
#include "EST_sigpr.h"

/**@name Signal processing example code
  * 
  * @toc
  */
//@{

EST_StrList empty;

void print_map(EST_TrackMap &t);
void print_track_map(EST_Track &t);

int main(void)

{
    EST_StrList base_list; // decl
    EST_StrList delta_list; // decl
    EST_StrList acc_list; // decl
    EST_Option op, al; // decl
    init_lib_ops(al, op); 
    EST_Wave sig; // decl
    EST_Track fv, part; // decl
    float shift; // decl
    int i;


    cout << "position 1\n";

    /**@name Producing a single type of feature vector for an utterance

       A number of types of signal processing can be performed by the
       sig2coef function. The following code demonstrates a simple
       case of calculating the linear prediction (LP) coefficients for
       a waveform.

       First set the order of the lpc analysis to 16 (this entails 17 actual
       coefficients) and then load in the waveform to be analysed.

      */
    //@{

    
    //@{ code
    
    int lpc_order = 16;
    sig.load(DATA "/kdt_001.wav");

    //@} code

    /**  Now allocate enough space in the track to hold the analysis.
	 The following command resizes fv to have enough frames for
	 analysis frames at 0.01 intervals up to the end of the waveform,
	 (sig.end()), and enough channels to store lpc_order + 1 coefficients.
	 The channels are named so as to take lpc coefficients.
    */
    //@{ code
    int num_frames;
    num_frames = (int)ceil(sig.end() / 0.01);
    fv.resize(num_frames, lpc_order + 1);
    //@} code

    /** The positions of the frames, corresponding to the middel of their
	analysis window also needs to be set. For fixed frame analysis, this
	can be done with the fill_time() function: */

    //@{ code
    fv.fill_time(0.01);
    //@} code

    /** The simplest way to do the actual analysis is as follows, which
	will fill the track with the values from the LP analysis using the
	default processing controls.
    */

    //@{ code
    sig2coef(sig, fv, "lpc");
    //@} code

    /** In this style of analysis, default values are used to control the
	windowing mechanisms which split the whole signal into frames.

	Specifically, each frame is defined to start a certain distance 
	before the time interval, and extending the same distance after.
	This distance is calculated as a function of the local window
	spacing and can be adjusted as follows:
    
	Extending one time period before and one time period after the
	current time mark:
    */
    //@{ code
    sig2coef(sig, fv, "lpc", 2.0);
    //@} code
    /** Extending 1.5 time periods before and  after the
    current time mark, etc;
    */
    //@{ code
    sig2coef(sig, fv, "lpc", 3.0);
    //@} code

    /** The type of windowing function may be changed also as this
	can be passed in as an optional argument. First we 
	create a window function (This is explained more in \Ref{Windowing}).
    */
    //@{ code
    EST_WindowFunc *wf =  EST_Window::creator("hamming");
    //@} code
    /** and then pass it in as the last argument
     */
    //@{ code
    sig2coef(sig, fv, "lpc", 3.0, wf);
    //@} code
    //@}

    /**@name Pitch-Synchronous vs fixed frame analysis.

      Most of the core signal processing functions operate on individual
      frames of speech and are oblivious as to how these frames were
      extracted from the original speech. This allows us to take the frames
      from anywhere in the signal: specifically, this facilitates two
      common forms of analysis:

      <formalpara><title>fixed frame</title><para>
      The time points are space at even intervals throughout the signal.
      </para></formalpara>
      <formalpara><title>pitch-synchronous</title><para> 
      The time points represent <emphasis>pitchmarks</emphasis>
      and correspond to a specific position in each pitch period,
      e.g. the instant of glottal closure.</para></formalpara>
      <para>

      It is a simple matter to fill the time array, but normally 
      pitchmarks are read from a file or taken from another signal
      processing algorithm (see \Ref{Pitchmark functions.}).
      </para>
      <para>


      There are many ways to fill the time array for fixed frame analysis.

      manually:      

      */
    //@{

    //@{ code
    int num_frames = 300;
    fv.resize(num_frames, lpc_order + 1);
    shift = 0.01; // time interval in seconds

    for (i = 0; i < num_frames; ++i)
	fv.t(i) = shift * (float) i;
    //@} code
    /** or by use of the  member function \Ref{EST_Track::fill_time} 
     */

    //@{ code
    fv.fill_time(0.01);
    //@} code

    /** Pitch synchronous values can simply be read from pitchmark
     files:
    */
    //@{ code
    fv.load(DATA "/kdt_001.pm");
    make_track(fv, "lpc", lpc_order + 1);
    //@} code

    /** Regardless of how the time points where obtain, the analysis
     function call is just the same:
    */
    //@{ code
    sig2coef(sig, fv, "lpc");
    //@} code
    //@}

    cout << "position 3\n";

    /**@name Naming Channels
      @id sigpr-example-naming-channels
      Multiple types of feature vector can be stored in the same Track.
      Imagine that we want lpc, cepstrum and power
      coefficients in that order in a track. This can be achieved by using
      the \Ref{sig2coef} function multiple times, or by the wrap
      around \Ref{sigpr_base} function.
      </para><para>

      It is vitally important here to ensure that before passing the
      track to the signal processing functions that it has the correct
      number of channels and that these are appropriately named. This is
      most easily done using the track map facility, explained
      in <LINK LINKEND="proan129">Naming Channels</LINK>
      </para><para>

      For each call, we only us the part of track that is relevant.
      The sub_track member function of \Ref{EST_Track} is used to get
      this. In the following example, we are assuming here that 
      fv has sufficient space for 17
      lpc coefficients, 8 cepstrum  coefficients and power and that
      they are stored in that order.

      */
    //@{
    //@{ code

    int cep_order = 16;
    EST_StrList map;

    map.append("$lpc-0+" Stringtoi(lpc_order));
    map.append("$cepc-0+" Stringtoi(cep_order));
    map.append("power");

    fv.resize(EST_CURRENT, map);
    //@} code

    /** An alternative is to use <function>add_channels_to_map()</function>
	which takes a list of coefficient types and makes a map.
	The order of each type of processing is extracted from
	op. 
	*/

    //@{ code

    EST_StrList coef_types;

    coef_types.append("lpc");
    coef_types.append("cep");
    coef_types.append("power");

    map.clear();

    add_channels_to_map(map, coef_types, op);
    fv.resize(EST_CURRENT, map);

    //@} code

    /** After allocating the right number of frames and channels 
	 in {\tt fv}, we extract a sub_track, which has all the frames
	 (i.e. between 0 and EST_ALL) and all the lpc channels
    */
    //@{ code
    fv.sub_track(part, 0, EST_ALL, 0, "lpc_0", "lpc_N");
    //@} code
    /** now call the signal processing function on this part:
     */
    //@{ code
    sig2coef(sig, part, "lpc");
    //@} code

    /** We repeat the procedure for the cepstral coefficients, but this
	time take the next 8 channels (17-24 inclusive)  and calculate the coefficients:
    */
    //@{ code
    fv.sub_track(part, 0, EST_ALL, "cep_0", "cep_N");

    sig2coef(sig, part, "cep");
    //@} code
    /** Extract the last channel for power and call the power function:
     */
    //@{ code
    fv.sub_track(part, 0, EST_ALL, "power", 1);
    power(sig, part, 0.01);

    //@} code

     /** While the above technique is adequate for our needs and is
	a useful demonstration of sub_track extraction, the
	\Ref{sigpr_base} function is normally easier to use as it does
	all the sub track extraction itself. To perform the lpc, cepstrum
	and power analysis, we put these names into a StrList and
	call \Ref{sigpr_base}.
     */
    //@{ code
    base_list.clear(); // empty the list, just in case
    base_list.append("lpc");
    base_list.append("cep");
    base_list.append("power");

    sigpr_base(sig, fv, op, base_list);
    //@} code
    /** This will call \Ref{sigpr_track} as many times as is necessary. 
     */
    //@}

    /**@name Producing delta and acceleration coefficients

      Delta coefficients represent the numerical differentiation of a
      track, and acceleration coefficients represent the second
      order numerical differentiation.

      By convention, delta coefficients have a "_d" suffix and acceleration
      coefficients "_a". If the coefficient is multi-dimensional, the
      numbers go after the "_d" or "_a".

      */
    //@{
    //@{ code

    map.append("$cep_d-0+" Stringtoi(cep_order)); // add deltas
    map.append("$cep_a-0+" Stringtoi(cep_order)); // add accs

    fv.resize(EST_CURRENT, map); // resize the track.
    //@} code
    /**
	Given a EST_Track of coefficients {\tt fv}, the \Ref{delta}
	function is used to produce the delta equivalents {\tt
	del}. The following uses the track allocated above and
	generates a set of cepstral coefficients and then makes their
	delta and acc:

    */
    //@{ code

    EST_Track del, acc;

    fv.sub_track(part, 0, EST_ALL, 0, "cep_0", "cep_N"); // make subtrack of coefs
    sig2coef(sig, part, "cep");  // fill with cepstra

    // make subtrack of deltas
    fv.sub_track(del, 0, EST_ALL, 0, "cep_d_0", "cep_d_N"); 
    delta(part, del);  // calculate deltas of part, and place answer in del

    // make subtrack of accs
    fv.sub_track(acc, 0, EST_ALL, 0, "cep_a_0", "cep_a_N"); 
    delta(del, acc);  // calculate deltas of del, and place answer in acc
    //@} code
    /** It is possible to directly calculate the delta coefficients of
	a type of coefficient, even if we don't have the base type.
	\Ref{sigpr_delta} will process the waveform, make a temporary
	track of the required type "lpc" and calculate the delta of this.
	</para><para>
	The following makes a set of delta reflection coefficients:
	
    */
    //@{ code
    map.append("$ref_d-0+" Stringtoi(lpc_order)); // add to map 
    fv.resize(EST_CURRENT, map); // resize the track.

    sigpr_delta(sig, fv, op, "ref");
    //@} code
    /** an equivalent function exists for acceleration coefficients:
     */
    //@{ code
    map.append("$lsf_a-0+" Stringtoi(lpc_order)); // add acc lsf
    fv.resize(EST_CURRENT, map); // resize the track.

    sigpr_acc(sig, fv, op, "ref");

    //@} code
    //@}

    /**@name Windowing

      The \Ref{EST_Window} class provides a variety of means to
      divide speech into frames using windowing mechanisms.

      </para><para>
      A window function can be created from a window name using the
      \Ref{EST_Window::creator} function:
     */
    //@{
    //@{ code

    EST_WindowFunc *hamm =  EST_Window::creator("hamming");
    EST_WindowFunc *rect =  EST_Window::creator("rectangular");
    //@} code
    /** This function can then be used to create a EST_TBuffer of 
	window values. In the following example the values from a
	256 point hamming window are stored in the buffer win_vals:
    */
    //@{ code
    EST_FVector frame;
    EST_FVector win_vals;

    hamm(256, win_vals);
    //@} code

    /** The make_window function also creates a window:
     */
    //@{ code
    EST_Window::make_window(win_vals, 256, "hamming",-1);
    //@} code

    /** this can then be used to make a frame of speech from the main EST_Wave
	sig. The following example extracts speech starting at sample 1000:
    */
    //@} code
    for (i = 0; i < 256; ++i)
	frame[i] = (float)sig.a(i + 1000) * win_vals[i];
    //@} code

    /** Alternatively, exactly the same operation can be performed in a
	single step by passing the window function to the
	\Ref{EST_Window::window_signal} function which takes a
	\Ref{EST_Wave} and performs windowing on a section of it,
	storing the output in the \Ref{EST_FVector} {\tt frame}.
    */
    //@{ code
    EST_Window::window_signal(sig, hamm, 1000, 256, frame, 1);
    //@} code
    /** The window function need not be explicitly created, the window
	signal can work on just the name of the window type:
    */

    //@{ code
    EST_Window::window_signal(sig, "hamming", 1000, 256, frame, 1);
    //@} code

    //@}
    /**@name Frame based signal processing
      @id sigpr-example-frames
      The signal processing library provides an extensive set of functions
      which operate on a single frame of coefficients.
      The following example shows one method of splitting the signal
      into frames and calling a signal processing algorithm.

      First set up the track for 16 order LP analysis:

      */
    //@{
    //@{ code

    map.clear();
    map.append("$lpc-0+16");
    
    fv.resize(EST_CURRENT, map);

    //@} code
    /** In this example, we take the analysis frame length to be 256 samples
	long, and the shift in samples is just the shift in seconds times the
	sampling frequency.
    */
    //@{ code
    int s_length = 256;
    int s_shift =  int(shift * float(sig.sample_rate()));
    EST_FVector coefs;
    //@} code

    /** Now we set up a loop which calculates the frames one at a time.
	 {\tt start} is the start position in samples of each frame.
	 The \Ref{EST_Window::window_signal} function is called which
	 makes a \Ref{EST_FVector} frame of the speech via a hamming window. 

	 Using the \Ref{EST_Track::frame} function, the EST_FVector 
	 {\tt coefs} is set to frame {\tt k} in the track. It is important
	 to understand that this operation involves setting an internal
	 smart pointer in {\tt coefs} to the memory of frame {\tt k}. This
	 allows the signal processing function \Ref{sig2lpc} to operate
	 on an input and output \Ref{EST_FVector}, without any copying to or
	 from the main track. After the \Ref{sig2lpc} call, the kth frame
	 of {\tt fv} is now filled with the LP coefficients.
    */
    //@{ code
    for (int k1 = 0; k1 < fv.num_frames(); ++k1)
    {
	int start = (k1 * s_shift) - (s_length/2);
	EST_Window::window_signal(sig, "hamming", start, s_length, frame, 1);

	fv.frame(coefs, k1); 	// Extract a single frame
	sig2lpc(frame, coefs); 	// Pass this to actual algorithm
    }
    //@} code

    /** A slightly different tack can be taken for pitch-synchronous analysis.
	Setting up fv with the pitchmarks and channels:
    */
    //@{ code
    fv.load(DATA "/kd1_001.pm");
    fv.resize(EST_CURRENT, map);
    //@} code
    /** Set up as before, but this time calculate the window starts and 
	lengths from the time points. In this example, the length is a 
	{\tt factor} (twice) the local frame shift.
	Note that the only difference between this function and the fixed
	frame one is in the calculation of the start and end points - the

	windowing, frame extraction and call to \Ref{sig2lpc} are exactly
	the same.
    */
    //@{ code
    float factor = 2.0;

    for (int k2 = 0; k2 < fv.num_frames(); ++k2)
    {
	s_length = irint(get_frame_size(fv, k2, sig.sample_rate())* factor);
	int start = (irint(fv.t(k2) * sig.sample_rate()) - (s_length/2));

	EST_Window::window_signal(sig, wf, start, s_length, frame, 1);

	fv.frame(coefs, k2);
	sig2lpc(frame, coefs);
    }
    //@} code
    //@}

    /**@name Filtering 

       In the EST library we so far have two main types of filter,
       {\bf finite impulse response (FIR)} filters and {\bf linear
       prediction (LP)} filters. {\bf infinite impulse response (IIR)}
       filters are not yet implemented, though LP filters are a
       special case of these.
       </para><para>
       Filtering involves 2 stages: the design of the filter and the
       use of this filter on the waveform.  
       </para><para>
       First we examine a simple low-pass filter which attempts to suppress
       all frequencies about a cut-off. Imagine we want to low pass filter
       a signal at 400Hz. First we design the filter:
    */
    //@{
    //@{ code

    EST_FVector filter;
    int freq = 400;
    int filter_order = 99;

    filter = design_lowpass_FIR_filter(sig.sample_rate(), 400, 99);
    //@} code
    /** And now use this filter on the signal:
     */
    //@{ code
    FIRfilter(sig, filter);
    //@} code
    /** For one-off filtering operations, the filter design can be
	done in the filter function itself. The \Ref{FIRlowpass_filter}
	function takes the signal, cut-off frequency and order as
	arguments and designs the filter on the fly. Because of the
	overhead of filter design, this function is expensive and
	should only be used for one-off operations.
    */
    //@{ code
    FIRlowpass_filter(sig, 400, 99);
    //@} code
    /** The equivalent operations exist for high-pass filtering:
     */
    //@{ code
    filter = design_highpass_FIR_filter(sig.sample_rate(), 50, 99);
    FIRfilter(sig, filter);
    FIRhighpass_filter(sig, 50, 99);
    //@} code
    /** Filters of arbitrary frequency response can also be designed using
	the \Ref{design_FIR_filter} function. This function takes a
	EST_FVector of order $2^{N}$ which specifies the desired frequency
	response up to 1/2 the sampling frequency. The function returns
	a set of filter coefficients that attempt to match the desired
	reponse.
    */
    //@{ code
    EST_FVector response(16);
    response[0] = 1;
    response[1] = 1;
    response[2] = 1;
    response[3] = 1;
    response[4] = 0;
    response[5] = 0;
    response[6] = 0;
    response[7] = 0;
    response[8] = 1;
    response[9] = 1;
    response[10] = 1;
    response[11] = 1;
    response[12] = 0;
    response[13] = 0;
    response[14] = 0;
    response[15] = 0;

    filter = design_FIR_filter(response, 15);

    FIRfilter(sig, response);
    //@} code
    /**The normal filtering functions can cause a time delay in the
       filtered waveform. To attempt to eliminate this, a set of
       double filter function functions are provided which guarantees
       zero phase differences between the original and filtered waveform.
    */
    //@{ code
    FIRlowpass_double_filter(sig, 400);
    FIRhighpass_double_filter(sig, 40);
    //@} code

    /** Sometimes it is undesirable to have the input signal overwritten.
	For these cases, a set of parallel functions exist which take
	a input waveform for reading and a output waveform for writing to.
    */
    //@{ code
    EST_Wave sig_out;

    FIRfilter(sig, sig_out, response);
    FIRlowpass_filter(sig, sig_out, 400);
    FIRhighpass_filter(sig, sig_out, 40);
    //@} code
    //@}

}

//@}