1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <numeric>
#include "rtc_base/checks.h"
namespace webrtc {
namespace rnn_vad {
namespace {
// Computes cross-correlation coefficients between |x| and |y| and writes them
// in |x_corr|. The lag values are in {0, ..., max_lag - 1}, where max_lag
// equals the size of |x_corr|.
// The |x| and |y| sub-arrays used to compute a cross-correlation coefficients
// for a lag l have both size "size of |x| - l" - i.e., the longest sub-array is
// used. |x| and |y| must have the same size.
void ComputeCrossCorrelation(
rtc::ArrayView<const float> x,
rtc::ArrayView<const float> y,
rtc::ArrayView<float, kNumLpcCoefficients> x_corr) {
constexpr size_t max_lag = x_corr.size();
RTC_DCHECK_EQ(x.size(), y.size());
RTC_DCHECK_LT(max_lag, x.size());
for (size_t lag = 0; lag < max_lag; ++lag) {
x_corr[lag] =
std::inner_product(x.begin(), x.end() - lag, y.begin() + lag, 0.f);
}
}
// Applies denoising to the auto-correlation coefficients.
void DenoiseAutoCorrelation(
rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) {
// Assume -40 dB white noise floor.
auto_corr[0] *= 1.0001f;
for (size_t i = 1; i < kNumLpcCoefficients; ++i) {
auto_corr[i] -= auto_corr[i] * (0.008f * i) * (0.008f * i);
}
}
// Computes the initial inverse filter coefficients given the auto-correlation
// coefficients of an input frame.
void ComputeInitialInverseFilterCoefficients(
rtc::ArrayView<const float, kNumLpcCoefficients> auto_corr,
rtc::ArrayView<float, kNumLpcCoefficients - 1> lpc_coeffs) {
float error = auto_corr[0];
for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
float reflection_coeff = 0.f;
for (size_t j = 0; j < i; ++j) {
reflection_coeff += lpc_coeffs[j] * auto_corr[i - j];
}
reflection_coeff += auto_corr[i + 1];
// Avoid division by numbers close to zero.
constexpr float kMinErrorMagnitude = 1e-6f;
if (std::fabs(error) < kMinErrorMagnitude) {
error = std::copysign(kMinErrorMagnitude, error);
}
reflection_coeff /= -error;
// Update LPC coefficients and total error.
lpc_coeffs[i] = reflection_coeff;
for (size_t j = 0; j<(i + 1)>> 1; ++j) {
const float tmp1 = lpc_coeffs[j];
const float tmp2 = lpc_coeffs[i - 1 - j];
lpc_coeffs[j] = tmp1 + reflection_coeff * tmp2;
lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1;
}
error -= reflection_coeff * reflection_coeff * error;
if (error < 0.001f * auto_corr[0]) {
break;
}
}
}
} // namespace
void ComputeAndPostProcessLpcCoefficients(
rtc::ArrayView<const float> x,
rtc::ArrayView<float, kNumLpcCoefficients> lpc_coeffs) {
std::array<float, kNumLpcCoefficients> auto_corr;
ComputeCrossCorrelation(x, x, {auto_corr.data(), auto_corr.size()});
if (auto_corr[0] == 0.f) { // Empty frame.
std::fill(lpc_coeffs.begin(), lpc_coeffs.end(), 0);
return;
}
DenoiseAutoCorrelation({auto_corr.data(), auto_corr.size()});
std::array<float, kNumLpcCoefficients - 1> lpc_coeffs_pre{};
ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre);
// LPC coefficients post-processing.
// TODO(bugs.webrtc.org/9076): Consider removing these steps.
float c1 = 1.f;
for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
c1 *= 0.9f;
lpc_coeffs_pre[i] *= c1;
}
const float c2 = 0.8f;
lpc_coeffs[0] = lpc_coeffs_pre[0] + c2;
lpc_coeffs[1] = lpc_coeffs_pre[1] + c2 * lpc_coeffs_pre[0];
lpc_coeffs[2] = lpc_coeffs_pre[2] + c2 * lpc_coeffs_pre[1];
lpc_coeffs[3] = lpc_coeffs_pre[3] + c2 * lpc_coeffs_pre[2];
lpc_coeffs[4] = c2 * lpc_coeffs_pre[3];
}
void ComputeLpResidual(
rtc::ArrayView<const float, kNumLpcCoefficients> lpc_coeffs,
rtc::ArrayView<const float> x,
rtc::ArrayView<float> y) {
RTC_DCHECK_LT(kNumLpcCoefficients, x.size());
RTC_DCHECK_EQ(x.size(), y.size());
std::array<float, kNumLpcCoefficients> input_chunk;
input_chunk.fill(0.f);
for (size_t i = 0; i < y.size(); ++i) {
const float sum = std::inner_product(input_chunk.begin(), input_chunk.end(),
lpc_coeffs.begin(), x[i]);
// Circular shift and add a new sample.
for (size_t j = kNumLpcCoefficients - 1; j > 0; --j)
input_chunk[j] = input_chunk[j - 1];
input_chunk[0] = x[i];
// Copy result.
y[i] = sum;
}
}
} // namespace rnn_vad
} // namespace webrtc
|