File: Sound_and_Spectrogram_extensions.cpp

package info (click to toggle)
praat 6.4.27%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 206,060 kB
  • sloc: cpp: 1,409,811; ansic: 286,305; makefile: 946; python: 340; sh: 35
file content (375 lines) | stat: -rw-r--r-- 14,176 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/* Sound_and_Spectrogram_extensions.cpp
 *
 * Copyright (C) 1993-2024 David Weenink
 *
 * This code is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This code is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this work. If not, see <http://www.gnu.org/licenses/>.
 */

/*
 djmw 20010718
 djmw 20020813 GPL header.
 djmw 20041124 Changed call to Sound_to_Spectrum.
 djmw 20070103 Sound interface changes
 djmw 20071107 Errors/warnings text changes
 djmw 20071202 Melder_warning<n>
*/

#include "Sound_and_Spectrogram_extensions.h"
#include "Sound_extensions.h"
#include "Sound_and_Spectrum.h"
#include "Sound_to_Pitch.h"
#include "Vector.h"
#include "NUM2.h"

autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int to_dB);

/*
	The gaussian(x) = (exp(-48*((i-(n+1)/2)/(n+1))^2)-exp(-12))/(1-exp(-12));
	For power we need the area under the square of this window:
	Integrate (gaussian(i)^2,i=1..n) =
		(sqrt(Pi)*sqrt(3)*sqrt(2)*erf(2*(n-1)*sqrt(3)*sqrt(2)/(n+1))*(n+1) + 24*exp(-24)*(n-1)+
		-4*sqrt(Pi)*sqrt(3)*exp(-12)*erf(2*(n-1)*sqrt(3)/(n+1))*(n+1))/ (24 * (-1+exp(-12))^2),
	where erf(x) = 1 - erfc(x) and n is the windowLength in samples.
	To compare with the rectangular window we need to divide this by the window width (n -1) x 1^2.
*/
static void _Spectrogram_windowCorrection (Spectrogram me, integer numberOfSamples_window) {
	double windowFactor = 1.0;
	if (numberOfSamples_window > 1) {
		const double e12 = exp (-12);
		const double denum = (e12 - 1) * (e12 - 1.0) * 24 * (numberOfSamples_window - 1);
		const double arg1 = 2.0 * NUMsqrt3 * (numberOfSamples_window - 1) / (numberOfSamples_window + 1);
		const double arg2 = arg1 * NUMsqrt2;
		const double p2 = NUMsqrtpi * NUMsqrt3 * NUMsqrt2 * (1 - NUMerfcc (arg2)) * (numberOfSamples_window + 1);
		const double p1 = 4 * NUMsqrtpi * NUMsqrt3 * e12 * (1 - NUMerfcc (arg1)) * (numberOfSamples_window + 1);
		windowFactor =  (p2 - p1 + 24 * (numberOfSamples_window - 1) * e12 * e12) / denum;
	}
	my z.get()  /=  windowFactor;
}

static autoSpectrum Sound_to_Spectrum_power (Sound me) {
	try {
		autoSpectrum thee = Sound_to_Spectrum (me, true);
		double scale = 2.0 * thy dx / (my xmax - my xmin);
		/*
			factor '2' because we combine positive and negative frequencies
			thy dx : width of frequency bin
			my xmax - my xmin : duration of sound
		*/
		VEC re = thy z.row (1), im = thy z.row (2);
		for (integer i = 1; i <= thy nx; i ++) {
			const double power = scale * (re [i] * re [i] + im [i] * im [i]);
			re [i] = power;
			im [i] = 0.0;
		}
		/*
			Correction of frequency bins at 0 Hz and nyquist: don't count for two.
		*/
		re [1] *= 0.5;
		re [thy nx] *= 0.5;
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Spectrum with spectral power created.");
	}
}

static void Sound_into_BarkSpectrogram_frame (Sound me, BarkSpectrogram thee, integer frame) {
	autoSpectrum him = Sound_to_Spectrum_power (me);
	integer numberOfFrequencies = his nx;

	autoVEC z = raw_VEC (numberOfFrequencies);
	for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
		const double frequency_Hz = his x1 + (ifreq - 1) * his dx;
		z [ifreq] = thy v_hertzToFrequency (frequency_Hz);
	}

	for (integer i = 1; i <= thy ny; i ++) {
		const double z0 = thy y1 + (i - 1) * thy dy;
		constVEC pow = his z.row (1); // TODO ??
		longdouble p = 0.0;
		for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
			/*
				Sekey & Hanson filter is defined in the power domain.
				We therefore multiply the power with a (and not a^2).
				integral (F(z),z=0..25) = 1.58/9
			*/
			const double a = NUMsekeyhansonfilter_amplitude (z0, z [ifreq]);
			p += a * pow [ifreq] ;
		}
		thy z [i] [frame] = double (p);
	}
}

autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) {
	try {
		const double samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency;
		const double windowDuration = 2.0 * analysisWidth; /* gaussian window */
		const double zmax = NUMhertzToBark (nyquist);
		double fmin_bark = 0.0;

		// Check defaults.

		if (f1_bark <= 0.0)
			f1_bark = 1.0;
		if (fmax_bark <= 0.0)
			fmax_bark = zmax;
		if (df_bark <= 0.0)
			df_bark = 1.0;

		fmax_bark = std::min (fmax_bark, zmax);
		const integer numberOfFilters = Melder_iround ( (fmax_bark - f1_bark) / df_bark);
		Melder_require (numberOfFilters > 0,
			U"The combination of filter parameters is not valid.");

		integer numberOfFrames;
		double t1;
		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark);

		autoMelderProgress progess (U"BarkSpectrogram analysis");

		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			const double t = Sampled_indexToX (thee.get(), iframe);

			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());
			Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe);

			if (iframe % 10 == 1)
				Melder_progress ( (double) iframe / numberOfFrames,  U"BarkSpectrogram analysis: frame ",
					iframe, U" from ", numberOfFrames, U".");
		}
		
		_Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);

		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no BarkSpectrogram created.");
	}
}

static void Sound_into_MelSpectrogram_frame (Sound me, MelSpectrogram thee, integer frame) {
	autoSpectrum him = Sound_to_Spectrum_power (me);

	for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
		longdouble power = 0.0;
		const double fc_mel = thy y1 + (ifilter - 1) * thy dy;
		const double fc_hz = thy v_frequencyToHertz (fc_mel);
		const double fl_hz = thy v_frequencyToHertz (std::max (fc_mel - thy dy, 0.0));
		const double fh_hz =  thy v_frequencyToHertz (std::min (fc_mel + thy dy, his xmax));
		integer ifrom, ito;
		Sampled_getWindowSamples (him.get(), fl_hz, fh_hz, & ifrom, & ito);
		for (integer i = ifrom; i <= ito; i ++) {
			/*
				Bin with a triangular filter the power (= amplitude-squared)
			*/
			const double f = his x1 + (i - 1) * his dx;
			const double a = NUMtriangularfilter_amplitude (fl_hz, fc_hz, fh_hz, f);
			power += a * his z [1] [i];
		}
		thy z [ifilter] [frame] = double (power);
	}
}

autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) {
	try {
		const double samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency;
		const double windowDuration = 2.0 * analysisWidth;   // Gaussian window
		double fmin_mel = 0.0;
		const double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist);

		// Check defaults.

		if (fmax_mel <= 0.0 || fmax_mel > fceiling)
			fmax_mel = fceiling;
		if (fmax_mel <= f1_mel) {
			f1_mel = fbottom;
			fmax_mel = fceiling;
		}
		if (f1_mel <= 0.0)
			f1_mel = fbottom;
		if (df_mel <= 0.0)
			df_mel = 100.0;

		// Determine the number of filters.

		const integer numberOfFilters = Melder_iround ((fmax_mel - f1_mel) / df_mel);


		integer numberOfFrames;
		double t1;
		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel);

		autoMelderProgress progress (U"MelSpectrogram analysis");

		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			const double t = Sampled_indexToX (thee.get(), iframe);
			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());
			Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe);
			
			if (iframe % 10 == 1)
				Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U".");
		}
		
		_Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);

		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": No MelSpectrogram created.");
	}
}

/*
	Analog formant filter response :
	H(f) = i f B / (f1^2 - f^2 + i f B)
*/
static int Sound_into_Spectrogram_frame (Sound me, Spectrogram thee, integer frame, double bw) {
	Melder_assert (bw > 0.0);
	autoSpectrum him = Sound_to_Spectrum_power (me);

	for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
		const double fc = thy y1 + (ifilter - 1) * thy dy;
		constVEC pow = his z.row (1);
		double p = 0.0;
		for (integer ifreq = 1; ifreq <= his nx; ifreq ++) {
			/*
				H(f) = ifB / (fc^2 - f^2 + ifB)
				H(f)| = fB / sqrt ((fc^2 - f^2)^2 + f^2B^2)
				|H(f)|^2 = f^2B^2 / ((fc^2 - f^2)^2 + f^2B^2)
						 = 1 / (((fc^2 - f^2) /fB)^2 + 1)
			*/
			const double f = his x1 + (ifreq - 1) * his dx;
			const double a = NUMformantfilter_amplitude (fc, bw, f);
			p += a * pow [ifreq];
		}
		thy z [ifilter] [frame] = p;
	}
	return 1;
}

autoSpectrogram Sound_to_Spectrogram_pitchDependent (Sound me, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw,
	double pitchFloor, double pitchCeiling)
{
	try {
		autoPitch thee = Sound_to_Pitch (me, dt, pitchFloor, pitchCeiling);
		return Sound_Pitch_to_Spectrogram (me, thee.get(), analysisWidth, dt, f1_hz, fmax_hz, df_hz, relative_bw);
	} catch (MelderError) {
		Melder_throw (me, U": no Spectrogram created.");
	}
}

autoSpectrogram Sound_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) {
	try {
		const double windowDuration = 2.0 * analysisWidth; /* gaussian window */
		const double nyquist = 0.5 / my dx, samplingFrequency = 1.0 / my dx, fmin_hz = 0.0;

		Melder_require (my xmin >= thy xmin && my xmax <= thy xmax,
			U"The domain of the Sound should be included in the domain of the Pitch.");

		double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit::HERTZ);

		if (isundef (f0_median) || f0_median == 0.0) {
			f0_median = 100.0;
			Melder_warning (U"Pitch values undefined. Bandwidth fixed to 100 Hz. ");
		}

		if (f1_hz <= 0.0)
			f1_hz = 100.0;
		if (fmax_hz <= 0.0)
			fmax_hz = nyquist;
		if (df_hz <= 0.0)
			df_hz = f0_median / 2.0;
		if (relative_bw <= 0.0)
			relative_bw = 1.1;

		fmax_hz = std::min (fmax_hz, nyquist);
		const integer numberOfFilters = Melder_iround ( (fmax_hz - f1_hz) / df_hz);

		double t1;
		integer numberOfFrames, numberOfUndefinedPitchFrames = 0;
		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz);

		// Temporary objects

		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoMelderProgress progress (U"Sound & Pitch: To FormantFilter");
		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			const double t = Sampled_indexToX (him.get(), iframe);
			double f0 = Pitch_getValueAtTime (thee, t, kPitch_unit::HERTZ, 0);

			if (isundef (f0) || f0 == 0.0) {
				numberOfUndefinedPitchFrames ++;
				f0 = f0_median;
			}
			const double b = relative_bw * f0;
			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());

			Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b);

			if (iframe % 10 == 1)
				Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ",
					numberOfFrames, U".");
		}
		
		_Spectrogram_windowCorrection (him.get(), window -> nx);

		return him;
	} catch (MelderError) {
		Melder_throw (U"FormantFilter not created from Pitch & FormantFilter.");
	}
}

autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int unit) {
	try {
		autoSound thee = Sound_create (my ny, my xmin, my xmax, my nx, my dx, my x1);
		for (integer i = 1; i <= my ny; i ++)
			for (integer j = 1; j <= my nx; j ++)
				thy z [i] [j] = my v_getValueAtSample (j, i, unit);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Sound created.");
	}
}

autoSound BandFilterSpectrograms_crossCorrelate (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
	try {
		autoSound sme = BandFilterSpectrogram_as_Sound (me, 1);   // to dB
		autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
		autoSound cc = Sounds_crossCorrelate (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
		return cc;
	} catch (MelderError) {
		Melder_throw (me, U" and ", thee, U" not cross-correlated.");
	}
}

autoSound BandFilterSpectrograms_convolve (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
	try {
		autoSound sme = BandFilterSpectrogram_as_Sound (me, 1);   // to dB
		autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
		autoSound cc = Sounds_convolve (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
		return cc;
	} catch (MelderError) {
		Melder_throw (me, U" and ", thee, U" not convolved.");
	}
}

/* End of file Sound_and_Spectrogram_extensions.cpp */