1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
|
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/saturation_protector.h"
#include <memory>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
constexpr float kMinMarginDb = 12.0f;
constexpr float kMaxMarginDb = 25.0f;
constexpr float kAttack = 0.9988493699365052f;
constexpr float kDecay = 0.9997697679981565f;
// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
// implement check-point and restore ops.
struct SaturationProtectorState {
bool operator==(const SaturationProtectorState& s) const {
return headroom_db == s.headroom_db &&
peak_delay_buffer == s.peak_delay_buffer &&
max_peaks_dbfs == s.max_peaks_dbfs &&
time_since_push_ms == s.time_since_push_ms;
}
inline bool operator!=(const SaturationProtectorState& s) const {
return !(*this == s);
}
float headroom_db;
SaturationProtectorBuffer peak_delay_buffer;
float max_peaks_dbfs;
int time_since_push_ms; // Time since the last ring buffer push operation.
};
// Resets the saturation protector state.
void ResetSaturationProtectorState(float initial_headroom_db,
SaturationProtectorState& state) {
state.headroom_db = initial_headroom_db;
state.peak_delay_buffer.Reset();
state.max_peaks_dbfs = kMinLevelDbfs;
state.time_since_push_ms = 0;
}
// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
// and the peak level `peak_dbfs` for an observed frame. `state` must not be
// modified without calling this function.
void UpdateSaturationProtectorState(float peak_dbfs,
float speech_level_dbfs,
SaturationProtectorState& state) {
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
state.time_since_push_ms += kFrameDurationMs;
if (SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
// Push `max_peaks_dbfs` back into the ring buffer.
state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
// Reset.
state.max_peaks_dbfs = kMinLevelDbfs;
state.time_since_push_ms = 0;
}
// Update the headroom by comparing the estimated speech level and the delayed
// max speech peak.
const float delayed_peak_dbfs =
state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
if (difference_db > state.headroom_db) {
// Attack.
state.headroom_db =
state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
} else {
// Decay.
state.headroom_db =
state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
}
state.headroom_db =
SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
}
// Saturation protector which recommends a headroom based on the recent peaks.
class SaturationProtectorImpl : public SaturationProtector {
public:
explicit SaturationProtectorImpl(float initial_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper),
initial_headroom_db_(initial_headroom_db),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
Reset();
}
SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
~SaturationProtectorImpl() = default;
float HeadroomDb() override { return headroom_db_; }
void Analyze(float speech_probability,
float peak_dbfs,
float speech_level_dbfs) override {
if (speech_probability < kVadConfidenceThreshold) {
// Not a speech frame.
if (adjacent_speech_frames_threshold_ > 1) {
// When two or more adjacent speech frames are required in order to
// update the state, we need to decide whether to discard or confirm the
// updates based on the speech sequence length.
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// First non-speech frame after a long enough sequence of speech
// frames. Update the reliable state.
reliable_state_ = preliminary_state_;
} else if (num_adjacent_speech_frames_ > 0) {
// First non-speech frame after a too short sequence of speech frames.
// Reset to the last reliable state.
preliminary_state_ = reliable_state_;
}
}
num_adjacent_speech_frames_ = 0;
} else {
// Speech frame observed.
num_adjacent_speech_frames_++;
// Update preliminary level estimate.
UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
preliminary_state_);
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the headroom.
headroom_db_ = preliminary_state_.headroom_db;
}
}
DumpDebugData();
}
void Reset() override {
num_adjacent_speech_frames_ = 0;
headroom_db_ = initial_headroom_db_;
ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
}
private:
void DumpDebugData() {
apm_data_dumper_->DumpRaw(
"agc2_saturation_protector_preliminary_max_peak_dbfs",
preliminary_state_.max_peaks_dbfs);
apm_data_dumper_->DumpRaw(
"agc2_saturation_protector_reliable_max_peak_dbfs",
reliable_state_.max_peaks_dbfs);
}
ApmDataDumper* const apm_data_dumper_;
const float initial_headroom_db_;
const int adjacent_speech_frames_threshold_;
int num_adjacent_speech_frames_;
float headroom_db_;
SaturationProtectorState preliminary_state_;
SaturationProtectorState reliable_state_;
};
} // namespace
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper) {
return std::make_unique<SaturationProtectorImpl>(
initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
}
} // namespace webrtc
|