File: phasevocoder.h

package info (click to toggle)
mixviews 1.20-10.1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 2,928 kB
  • ctags: 5,960
  • sloc: cpp: 32,879; ansic: 2,110; makefile: 445; sh: 17
file content (243 lines) | stat: -rw-r--r-- 9,292 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// phasevocoder.h

/******************************************************************************
 *
 *  MiXViews - an X window system based sound & data editor/processor
 *
 *  Copyright (c) 1993, 1994 Regents of the University of California
 *
 *  Author:     Douglas Scott
 *  Date:       December 13, 1994
 *
 *  Permission to use, copy and modify this software and its documentation
 *  for research and/or educational purposes and without fee is hereby granted,
 *  provided that the above copyright notice appear in all copies and that
 *  both that copyright notice and this permission notice appear in
 *  supporting documentation. The author reserves the right to distribute this
 *  software and its documentation.  The University of California and the author
 *  make no representations about the suitability of this software for any 
 *  purpose, and in no event shall University of California be liable for any
 *  damage, loss of data, or profits resulting from its use.
 *  It is provided "as is" without express or implied warranty.
 *
 ******************************************************************************/


// This class is the engine which performs the actual analysis and resynthesis
// of Phase Vocoder data and sound.  It is used as a member class for the
// PVAnalyzer and PVSynthesizer classes.

// included here is the comment from the original C source code file:

/*------------------------------------------------------------------

PROGRAM:	Phase Vocoder 

AUTHOR: 	Mark Dolson
		Center for Music Experiment Q-037
		University of California, San Diego
		La Jolla, Ca. 92093

DATE:		November 1, 1984


	This is a second release of a phase vocoder being
developed at the Computer Audio Research Lab of the Center for 
Music Experiment at U.C.S.D.  It performs both analysis and synthesis
efficiently using a Weighted Overlap-Add algorithm.  Whenever
possible, the minimum mean-squared-error formulation of Griffin
and Lim is used ("Signal Estimation from Modified Short-Time
Fourier Transform", I.E.E.E. Trans. ASSP-32, No. 2, April, 1984);
otherwise, the technique is that described in "Non-Uniform
Time-Scale Modification of Speech" by Samuel Holtzman Dantus
(M.S. and E.E. Thesis, M.I.T., 1980) and in "A Weighted 
Overlap-Add Method of Short-Time Fourier Analysis/Synthesis" by
R. E. Crochiere (I.E.E.E. Trans. ASSP-28, No. 1, February, 1980).
The code is written entirely in the C programming language except
for standard FFT subroutines written in FORTRAN which are taken
from the I.E.E.E. Programs for Digital Signal Processing package.
This code runs at U.C.S.D. on a VAX 11-780 under Berkeley UNIX;
with some modification, it should run on nearly any machine
supporting FORTRAN and C.
------------------------------------------------------------------

		"R = input sample rate (automatically read from stdin)\n",
		"F = fundamental frequency (sampRate/256) DON'T USE -F AND -N\n",
		"N = # of bandpass filters (256 unless -F is specified)\n",
		"W = filter overlap factor: {1,2,(4),8} DON'T USE -W AND -M\n",
		(note -- these were {0, 1, (2), 3} originally)
		"M = analysis window length (fftSize unless -W is specified)\n",
		"L = synthesis window length (M) \n",
		"D = decimation factor (min((M/(8*timeScaleFactor)),(M/8))\n",
		"I = interpolation factor (=timeScaleFactor*inputFrameOffset) \n",
		"T = time-scale factor (1.)\n",
		"P = pitch-scale factor (1.) DON'T USE -T AND -P\n",
		"C = resynthesize odd (1) or even (2) channels only\n",
		"i = resynthesize bandpass filters i thru j only\n",
		"j = resynthesize bandpass filters i thru j only\n",
		"b = starting sample (0)\n",
		"e = final sample (end of input)\n",
		"w = warp factor for spectral envelope (1.)\n",
		"A:  analysis only: output will be analysis data\n",
		"E:  analysis only: output will be spectral envelope\n",
		"X:  analysis only: output will be magnitude values\n",
		"S:  synthesis only: input must be analysis data\n",
		"K:  use Kaiser filter instead of hamming\n",
		"V [filename]:  verbose (summarize on pvoc.stat or file)\n",
*/

#ifndef PHASEVOCODER_H
#ifdef __GNUG__
#pragma interface
#endif
#define PHASEVOCODER_H

#include "localdefs.h"

class InPipeAction;
class Data;
class Envelope;

class PhaseVocoder {
	friend class PVAnalyzer;
	friend class PVSynthesizer;
	friend class PvocRequester;
public:
	enum Mode { Analysis, Synthesis, Resynthesis, Spectrum, Magnitudes };
	struct Info {
		// ctor for analysis
		Info(double sr, float scale, Mode=PhaseVocoder::Analysis,
			 int n=0, int f=0, int m=0, int d=0,
		     float t=1, float p=1, boolean kais=false);
		// ctor for synthesis
		Info(double sr, float scale, int n=0, int f=0, int d=0, int l=0,
		     int i=0, float t=1, int ii=0, int jj=0, float warp=0,
			 boolean kais=false);

		int	fftSize,		// number of phase vocoder channels (bands)
			fundFreq,		// fundamental frequency (determines fftSize)
			inputFrameSize,	// length of analWindow impulse response
			inputFrameOffset, // decimation factor (default inputFrameSize/8)
			outputFrameSize,	// length of synWindow impulse response
			outputFrameOffset,	// interpolation factor
								// (default is outputFrameOffset=inputFrameOffset)
			firstBand,		// flag for resynthesizing chans i to j only
			lastBand;		// flag for resynthesizing chans i to j only

		boolean K;			// flag for Kaiser window

		float
			timeScaleFactor,	// time scale factor ( >1 to expand)
			pchScaleFactor,		// pitch scale factor 
			warp,				// spectral envelope warp factor 
			inputScalingFactor;	// to scale short int samps between 1 and -1
				
		double samplingRate;		// sample rate of sound

		Mode runMode;
	};
protected:
	PhaseVocoder(Info &);
	~PhaseVocoder();
	void initialize();
	void reset();
	boolean isGood();
	boolean analyzing() { return I.runMode == Analysis; }
	boolean synthesizing() { return I.runMode == Synthesis; }
	int runAnalysis(double *, Data *);
	int runSynthesis(Data *, InPipeAction *);

	int analysisChannels() { return I.fftSize + 2; }
	int freqBands() { return fftPoints + 1; }
	int getStartingOffset() { return startingOffset; }
	int calculateAnalysisLength(int inputLen);
	int getInputFrameOffset() { return I.inputFrameOffset; }
	int getOutputFrameOffset() { return scaledOutputOffset; }
	int roundedInputFrameSize() { return I.inputFrameSize + inputFrameEven; }
	boolean variableTimeScaling() { return timeScaleEnvelope != nil; }
private:
	void zeroPointers();
	void setDefaults();
	void setUpLog();
	boolean checkAndSetValues();
	void createAnalysisWindow();
	void createSynthesisWindow();
	void createBuffers();
	static void hamming(float *win, int winLen, int even);
	void updateAmps();
	void printToLog();
	
	void applyInputWindow(double *);
	void analyze();
	void convertToReal();
	void detectAndWarpEnvelope();
	void outputAnalysis(Data *);

	void loadAnalysis(Data *);
	void limitBands();
	void convertFromReal();
	void synthesize();
	void applyOutputWindow();
	int shiftOut(InPipeAction *);
	void getTimeScaleFactor();
	void calculateOffsetsAndIncrement();
private:	
    Info I;

	int
		startingOffset,		// initial number of samps for starting frame
		analWinLen,			// half-length of analysis window
		synWinLen,			// half-length of synthesis window
		outCount,			// number of samples written to output
		obuflen,			// length of output buffer
		inSamp,				// current input (analysis) sample
		outSamp;			// current output (synthesis) sample

	float *output,		// pointer to start of output buffer
		*anal,			// pointer to start of analysis buffer
		*syn,			// pointer to start of synthesis buffer
		*nextOut,		// pointer to next empty word in output
		*analWindowBuf,	// analysis window buffer
		*analWindow,	// pointer to center of analysis window
		*synWindowBuf,	// synthesis window buffer
		*synWindow,		// pointer to center of synthesis window
		*maxAmp,		// pointer to start of max amp buffer
		*avgAmp,		// pointer to start of avg amp buffer
		*avgFrq,		// pointer to start of avg frq buffer
		*env,			// pointer to start of spectral envelope
		*oldInPhase,	// pointer to start of input phase buffer
		*oldOutPhase;	// pointer to start of output phase buffer

	FILE	*fp;			// auxiliary output file (-V option)
	
	static float Pi;
	static float TwoPi;
	static float HalfPi;
	static float beta;				// for Kaiser window
	
	float
		RoverTwoPi,			// sampRate/inputFrameOffset divided by 2*Pi 
		TwoPioverR,			// 2*Pi divided by sampRate/outputFrameOffset 
		sum,				// scale factor for renormalizing windows 
		ftot;				// scale factor for calculating statistics 

	int
		sampsIn, // no. of new inputs to read (sampsIn <= inputFrameOffset)
		fftPoints,			// fftSize / 2
		invFftSize,			// synthesis:  fftSize / pchScaleFactor
		invFftPoints,		// invFftSize / 2
		scaledOutputOffset,	// synthesis:  outputFrameOffset/pchScaleFactor
		sampsOut,			// synthesis:  number of new outputs to write
		inputFrameEven,		// flag for even inputFrameSize
		outputFrameEven,	// flag for even outputFrameSize */
		C,					// flag for resynthesizing even or odd chans
		bandsLimited,		// flag for selected channel resynthesis
		verbose,			// verbose (summarize analysis) output flag
		X,		/* flag for magnitude output */
		E;		/* flag for spectral envelope output */
	boolean initialized;
	Envelope* timeScaleEnvelope;
};

#endif