1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
|
////////////////////////////////////////////////////////////////////////////////
///
/// Beats-per-minute (BPM) detection routine.
///
/// The beat detection algorithm works as follows:
/// - Use function 'inputSamples' to input a chunks of samples to the class for
/// analysis. It's a good idea to enter a large sound file or stream in smallish
/// chunks of around few kilosamples in order not to extinguish too much RAM memory.
/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
/// which is basically ok as low (bass) frequencies mostly determine the beat rate.
/// Simple averaging is used for anti-alias filtering because the resulting signal
/// quality isn't of that high importance.
/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
/// taking absolute value that's smoothed by sliding average. Signal levels that
/// are below a couple of times the general RMS amplitude level are cut away to
/// leave only notable peaks there.
/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
/// autocorrelation function of the enveloped signal.
/// - After whole sound data file has been analyzed as above, the bpm level is
/// detected by function 'getBpm' that finds the highest peak of the autocorrelation
/// function, calculates it's precise location and converts this reading to bpm's.
///
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed : $Date: 2012-08-30 22:45:25 +0300 (Thu, 30 Aug 2012) $
// File revision : $Revision: 4 $
//
// $Id: BPMDetect.cpp 149 2012-08-30 19:45:25Z oparviai $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////////////////////
#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "FIFOSampleBuffer.h"
#include "PeakFinder.h"
#include "BPMDetect.h"
using namespace soundtouch;
#define INPUT_BLOCK_SAMPLES 2048
#define DECIMATED_BLOCK_SAMPLES 256
/// decay constant for calculating RMS volume sliding average approximation
/// (time constant is about 10 sec)
const float avgdecay = 0.99986f;
/// Normalization coefficient for calculating RMS sliding average approximation.
const float avgnorm = (1 - avgdecay);
////////////////////////////////////////////////////////////////////////////////
// Enable following define to create bpm analysis file:
// #define _CREATE_BPM_DEBUG_FILE
#ifdef _CREATE_BPM_DEBUG_FILE
#define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
{
FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
int i;
if (fptr)
{
printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
for (i = minpos; i < maxpos; i ++)
{
fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
}
fclose(fptr);
}
}
#else
#define _SaveDebugData(a,b,c,d)
#endif
////////////////////////////////////////////////////////////////////////////////
BPMDetect::BPMDetect(int numChannels, int aSampleRate)
{
this->sampleRate = aSampleRate;
this->channels = numChannels;
decimateSum = 0;
decimateCount = 0;
envelopeAccu = 0;
// Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's
// safe initial RMS signal level value for song data. This value is then adapted
// to the actual level during processing.
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
// integer samples
RMSVolumeAccu = (1500 * 1500) / avgnorm;
#else
// float samples, scaled to range [-1..+1[
RMSVolumeAccu = (0.045f * 0.045f) / avgnorm;
#endif
// choose decimation factor so that result is approx. 1000 Hz
decimateBy = sampleRate / 1000;
assert(decimateBy > 0);
assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
// Calculate window length & starting item according to desired min & max bpms
windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
assert(windowLen > windowStart);
// allocate new working objects
xcorr = new float[windowLen];
memset(xcorr, 0, windowLen * sizeof(float));
// allocate processing buffer
buffer = new FIFOSampleBuffer();
// we do processing in mono mode
buffer->setChannels(1);
buffer->clear();
}
BPMDetect::~BPMDetect()
{
delete[] xcorr;
delete buffer;
}
/// convert to mono, low-pass filter & decimate to about 500 Hz.
/// return number of outputted samples.
///
/// Decimation is used to remove the unnecessary frequencies and thus to reduce
/// the amount of data needed to be processed as calculating autocorrelation
/// function is a very-very heavy operation.
///
/// Anti-alias filtering is done simply by averaging the samples. This is really a
/// poor-man's anti-alias filtering, but it's not so critical in this kind of application
/// (it'd also be difficult to design a high-quality filter with steep cut-off at very
/// narrow band)
int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
{
int count, outcount;
LONG_SAMPLETYPE out;
assert(channels > 0);
assert(decimateBy > 0);
outcount = 0;
for (count = 0; count < numsamples; count ++)
{
int j;
// convert to mono and accumulate
for (j = 0; j < channels; j ++)
{
decimateSum += src[j];
}
src += j;
decimateCount ++;
if (decimateCount >= decimateBy)
{
// Store every Nth sample only
out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
decimateSum = 0;
decimateCount = 0;
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
// check ranges for sure (shouldn't actually be necessary)
if (out > 32767)
{
out = 32767;
}
else if (out < -32768)
{
out = -32768;
}
#endif // SOUNDTOUCH_INTEGER_SAMPLES
dest[outcount] = (SAMPLETYPE)out;
outcount ++;
}
}
return outcount;
}
// Calculates autocorrelation function of the sample history buffer
void BPMDetect::updateXCorr(int process_samples)
{
int offs;
SAMPLETYPE *pBuffer;
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
pBuffer = buffer->ptrBegin();
for (offs = windowStart; offs < windowLen; offs ++)
{
LONG_SAMPLETYPE sum;
int i;
sum = 0;
for (i = 0; i < process_samples; i ++)
{
sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
}
// xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients
// if it's desired that the system adapts automatically to
// various bpms, e.g. in processing continouos music stream.
// The 'xcorr_decay' should be a value that's smaller than but
// close to one, and should also depend on 'process_samples' value.
xcorr[offs] += (float)sum;
}
}
// Calculates envelope of the sample data
void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples)
{
const static double decay = 0.7f; // decay constant for smoothing the envelope
const static double norm = (1 - decay);
int i;
LONG_SAMPLETYPE out;
double val;
for (i = 0; i < numsamples; i ++)
{
// calc average RMS volume
RMSVolumeAccu *= avgdecay;
val = (float)fabs((float)samples[i]);
RMSVolumeAccu += val * val;
// cut amplitudes that are below cutoff ~2 times RMS volume
// (we're interested in peak values, not the silent moments)
if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm))
{
val = 0;
}
// smooth amplitude envelope
envelopeAccu *= decay;
envelopeAccu += val;
out = (LONG_SAMPLETYPE)(envelopeAccu * norm);
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
// cut peaks (shouldn't be necessary though)
if (out > 32767) out = 32767;
#endif // SOUNDTOUCH_INTEGER_SAMPLES
samples[i] = (SAMPLETYPE)out;
}
}
void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
{
SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
// iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
while (numSamples > 0)
{
int block;
int decSamples;
block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
// decimate. note that converts to mono at the same time
decSamples = decimate(decimated, samples, block);
samples += block * channels;
numSamples -= block;
// envelope new samples and add them to buffer
calcEnvelope(decimated, decSamples);
buffer->putSamples(decimated, decSamples);
}
// when the buffer has enought samples for processing...
if ((int)buffer->numSamples() > windowLen)
{
int processLength;
// how many samples are processed
processLength = (int)buffer->numSamples() - windowLen;
// ... calculate autocorrelations for oldest samples...
updateXCorr(processLength);
// ... and remove them from the buffer
buffer->receiveSamples(processLength);
}
}
void BPMDetect::removeBias()
{
int i;
float minval = 1e12f; // arbitrary large number
for (i = windowStart; i < windowLen; i ++)
{
if (xcorr[i] < minval)
{
minval = xcorr[i];
}
}
for (i = windowStart; i < windowLen; i ++)
{
xcorr[i] -= minval;
}
}
float BPMDetect::getBpm()
{
double peakPos;
double coeff;
PeakFinder peakFinder;
coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
// save bpm debug analysis data if debug data enabled
_SaveDebugData(xcorr, windowStart, windowLen, coeff);
// remove bias from xcorr data
removeBias();
// find peak position
peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
assert(decimateBy != 0);
if (peakPos < 1e-9) return 0.0; // detection failed.
// calculate BPM
return (float) (coeff / peakPos);
}
|