File: lp_residual.cc

package info (click to toggle)
webrtc-audio-processing 1.3-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,112 kB
  • sloc: cpp: 50,766; ansic: 19,793; asm: 236; makefile: 4
file content (138 lines) | stat: -rw-r--r-- 4,938 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/*
 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h"

#include <algorithm>
#include <array>
#include <cmath>
#include <numeric>

#include "rtc_base/checks.h"

namespace webrtc {
namespace rnn_vad {
namespace {

// Computes cross-correlation coefficients between |x| and |y| and writes them
// in |x_corr|. The lag values are in {0, ..., max_lag - 1}, where max_lag
// equals the size of |x_corr|.
// The |x| and |y| sub-arrays used to compute a cross-correlation coefficients
// for a lag l have both size "size of |x| - l" - i.e., the longest sub-array is
// used. |x| and |y| must have the same size.
void ComputeCrossCorrelation(
    rtc::ArrayView<const float> x,
    rtc::ArrayView<const float> y,
    rtc::ArrayView<float, kNumLpcCoefficients> x_corr) {
  constexpr size_t max_lag = x_corr.size();
  RTC_DCHECK_EQ(x.size(), y.size());
  RTC_DCHECK_LT(max_lag, x.size());
  for (size_t lag = 0; lag < max_lag; ++lag) {
    x_corr[lag] =
        std::inner_product(x.begin(), x.end() - lag, y.begin() + lag, 0.f);
  }
}

// Applies denoising to the auto-correlation coefficients.
void DenoiseAutoCorrelation(
    rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) {
  // Assume -40 dB white noise floor.
  auto_corr[0] *= 1.0001f;
  for (size_t i = 1; i < kNumLpcCoefficients; ++i) {
    auto_corr[i] -= auto_corr[i] * (0.008f * i) * (0.008f * i);
  }
}

// Computes the initial inverse filter coefficients given the auto-correlation
// coefficients of an input frame.
void ComputeInitialInverseFilterCoefficients(
    rtc::ArrayView<const float, kNumLpcCoefficients> auto_corr,
    rtc::ArrayView<float, kNumLpcCoefficients - 1> lpc_coeffs) {
  float error = auto_corr[0];
  for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
    float reflection_coeff = 0.f;
    for (size_t j = 0; j < i; ++j) {
      reflection_coeff += lpc_coeffs[j] * auto_corr[i - j];
    }
    reflection_coeff += auto_corr[i + 1];

    // Avoid division by numbers close to zero.
    constexpr float kMinErrorMagnitude = 1e-6f;
    if (std::fabs(error) < kMinErrorMagnitude) {
      error = std::copysign(kMinErrorMagnitude, error);
    }

    reflection_coeff /= -error;
    // Update LPC coefficients and total error.
    lpc_coeffs[i] = reflection_coeff;
    for (size_t j = 0; j<(i + 1)>> 1; ++j) {
      const float tmp1 = lpc_coeffs[j];
      const float tmp2 = lpc_coeffs[i - 1 - j];
      lpc_coeffs[j] = tmp1 + reflection_coeff * tmp2;
      lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1;
    }
    error -= reflection_coeff * reflection_coeff * error;
    if (error < 0.001f * auto_corr[0]) {
      break;
    }
  }
}

}  // namespace

void ComputeAndPostProcessLpcCoefficients(
    rtc::ArrayView<const float> x,
    rtc::ArrayView<float, kNumLpcCoefficients> lpc_coeffs) {
  std::array<float, kNumLpcCoefficients> auto_corr;
  ComputeCrossCorrelation(x, x, {auto_corr.data(), auto_corr.size()});
  if (auto_corr[0] == 0.f) {  // Empty frame.
    std::fill(lpc_coeffs.begin(), lpc_coeffs.end(), 0);
    return;
  }
  DenoiseAutoCorrelation({auto_corr.data(), auto_corr.size()});
  std::array<float, kNumLpcCoefficients - 1> lpc_coeffs_pre{};
  ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre);
  // LPC coefficients post-processing.
  // TODO(bugs.webrtc.org/9076): Consider removing these steps.
  float c1 = 1.f;
  for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
    c1 *= 0.9f;
    lpc_coeffs_pre[i] *= c1;
  }
  const float c2 = 0.8f;
  lpc_coeffs[0] = lpc_coeffs_pre[0] + c2;
  lpc_coeffs[1] = lpc_coeffs_pre[1] + c2 * lpc_coeffs_pre[0];
  lpc_coeffs[2] = lpc_coeffs_pre[2] + c2 * lpc_coeffs_pre[1];
  lpc_coeffs[3] = lpc_coeffs_pre[3] + c2 * lpc_coeffs_pre[2];
  lpc_coeffs[4] = c2 * lpc_coeffs_pre[3];
}

void ComputeLpResidual(
    rtc::ArrayView<const float, kNumLpcCoefficients> lpc_coeffs,
    rtc::ArrayView<const float> x,
    rtc::ArrayView<float> y) {
  RTC_DCHECK_LT(kNumLpcCoefficients, x.size());
  RTC_DCHECK_EQ(x.size(), y.size());
  std::array<float, kNumLpcCoefficients> input_chunk;
  input_chunk.fill(0.f);
  for (size_t i = 0; i < y.size(); ++i) {
    const float sum = std::inner_product(input_chunk.begin(), input_chunk.end(),
                                         lpc_coeffs.begin(), x[i]);
    // Circular shift and add a new sample.
    for (size_t j = kNumLpcCoefficients - 1; j > 0; --j)
      input_chunk[j] = input_chunk[j - 1];
    input_chunk[0] = x[i];
    // Copy result.
    y[i] = sum;
  }
}

}  // namespace rnn_vad
}  // namespace webrtc