1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
#include <algorithm>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
using AdaptiveDigitalConfig =
AudioProcessing::Config::GainController2::AdaptiveDigital;
constexpr int kHeadroomHistogramMin = 0;
constexpr int kHeadroomHistogramMax = 50;
constexpr int kGainDbHistogramMax = 30;
// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
// safety margin to allow transient peaks to exceed the target peak level
// without clipping.
float ComputeGainDb(float input_level_dbfs,
const AdaptiveDigitalConfig& config) {
// If the level is very low, apply the maximum gain.
if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
return config.max_gain_db;
}
// We expect to end up here most of the time: the level is below
// -headroom, but we can boost it to -headroom.
if (input_level_dbfs < -config.headroom_db) {
return -config.headroom_db - input_level_dbfs;
}
// The level is too high and we can't boost.
RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
return 0.0f;
}
// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
// `target_gain_db` so that the output noise level equals
// `max_output_noise_level_dbfs`.
float LimitGainByNoise(float target_gain_db,
float input_noise_level_dbfs,
float max_output_noise_level_dbfs,
ApmDataDumper& apm_data_dumper) {
const float max_allowed_gain_db =
max_output_noise_level_dbfs - input_noise_level_dbfs;
apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
max_allowed_gain_db);
return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
}
float LimitGainByLowConfidence(float target_gain_db,
float last_gain_db,
float limiter_audio_level_dbfs,
bool estimate_is_confident) {
if (estimate_is_confident ||
limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
return target_gain_db;
}
const float limiter_level_dbfs_before_gain =
limiter_audio_level_dbfs - last_gain_db;
// Compute a new gain so that `limiter_level_dbfs_before_gain` +
// `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
const float new_target_gain_db = std::max(
kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
return std::min(new_target_gain_db, target_gain_db);
}
// Computes how the gain should change during this frame.
// Return the gain difference in db to 'last_gain_db'.
float ComputeGainChangeThisFrameDb(float target_gain_db,
float last_gain_db,
bool gain_increase_allowed,
float max_gain_decrease_db,
float max_gain_increase_db) {
RTC_DCHECK_GT(max_gain_decrease_db, 0);
RTC_DCHECK_GT(max_gain_increase_db, 0);
float target_gain_difference_db = target_gain_db - last_gain_db;
if (!gain_increase_allowed) {
target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
}
return SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
max_gain_increase_db);
}
} // namespace
AdaptiveDigitalGainController::AdaptiveDigitalGainController(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
int adjacent_speech_frames_threshold)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
config_(config),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
kFrameDurationMs / 1000.0f),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
last_gain_db_(config_.initial_gain_db) {
RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
}
void AdaptiveDigitalGainController::Process(const FrameInfo& info,
DeinterleavedView<float> frame) {
RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK(
frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
<< "`frame` does not look like a 10 ms frame for an APM supported sample "
"rate";
// Compute the input level used to select the desired gain.
RTC_DCHECK_GT(info.headroom_db, 0.0f);
const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
*apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
// Forbid increasing the gain until enough adjacent speech frames are
// observed.
bool first_confident_speech_frame = false;
if (info.speech_probability < kVadConfidenceThreshold) {
frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
} else if (frames_to_gain_increase_allowed_ > 0) {
frames_to_gain_increase_allowed_--;
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
}
apm_data_dumper_->DumpRaw(
"agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
frames_to_gain_increase_allowed_);
const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
float max_gain_increase_db = max_gain_change_db_per_10ms_;
if (first_confident_speech_frame) {
// No gain increase happened while waiting for a long enough speech
// sequence. Therefore, temporarily allow a faster gain increase.
RTC_DCHECK(gain_increase_allowed);
max_gain_increase_db *= adjacent_speech_frames_threshold_;
}
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_, gain_increase_allowed,
/*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
max_gain_increase_db);
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
target_gain_db - last_gain_db_);
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
gain_change_this_frame_db);
// Optimization: avoid calling math functions if gain does not
// change.
if (gain_change_this_frame_db != 0.f) {
gain_applier_.SetGainFactor(
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
gain_applier_.ApplyGain(frame);
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
last_gain_db_);
// Log every 10 seconds.
calls_since_last_gain_log_++;
if (calls_since_last_gain_log_ == 1000) {
calls_since_last_gain_log_ = 0;
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
-info.speech_level_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-info.noise_rms_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
kHeadroomHistogramMax,
kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
last_gain_db_, 0, kGainDbHistogramMax,
kGainDbHistogramMax + 1);
RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
<< " | speech_dbfs: " << info.speech_level_dbfs
<< " | noise_dbfs: " << info.noise_rms_dbfs
<< " | headroom_db: " << info.headroom_db
<< " | gain_db: " << last_gain_db_;
}
}
} // namespace webrtc
|