File: intelligibility_enhancer.h

package info (click to toggle)
chromium-browser 57.0.2987.98-1~deb8u1
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 2,637,852 kB
  • ctags: 2,544,394
  • sloc: cpp: 12,815,961; ansic: 3,676,222; python: 1,147,112; asm: 526,608; java: 523,212; xml: 286,794; perl: 92,654; sh: 86,408; objc: 73,271; makefile: 27,698; cs: 18,487; yacc: 13,031; tcl: 12,957; pascal: 4,875; ml: 4,716; lex: 3,904; sql: 3,862; ruby: 1,982; lisp: 1,508; php: 1,368; exp: 404; awk: 325; csh: 117; jsp: 39; sed: 37
file content (137 lines) | stat: -rw-r--r-- 5,282 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/*
 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_

#include <complex>
#include <memory>
#include <vector>

#include "webrtc/base/swap_queue.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
#include "webrtc/modules/audio_processing/render_queue_item_verifier.h"
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

namespace webrtc {

// Speech intelligibility enhancement module. Reads render and capture
// audio streams and modifies the render stream with a set of gains per
// frequency bin to enhance speech against the noise background.
// Details of the model and algorithm can be found in the original paper:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
class IntelligibilityEnhancer : public LappedTransform::Callback {
 public:
  IntelligibilityEnhancer(int sample_rate_hz,
                          size_t num_render_channels,
                          size_t num_bands,
                          size_t num_noise_bins);

  ~IntelligibilityEnhancer() override;

  // Sets the capture noise magnitude spectrum estimate.
  void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);

  // Reads chunk of speech in time domain and updates with modified signal.
  void ProcessRenderAudio(AudioBuffer* audio);
  bool active() const;

 protected:
  // All in frequency domain, receives input |in_block|, applies
  // intelligibility enhancement, and writes result to |out_block|.
  void ProcessAudioBlock(const std::complex<float>* const* in_block,
                         size_t in_channels,
                         size_t frames,
                         size_t out_channels,
                         std::complex<float>* const* out_block) override;

 private:
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
                           TestNoiseGainHasExpectedResult);
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
                           TestAllBandsHaveSameDelay);

  // Updates the SNR estimation and enables or disables this component using a
  // hysteresis.
  void SnrBasedEffectActivation();

  // Bisection search for optimal |lambda|.
  void SolveForLambda(float power_target);

  // Transforms freq gains to ERB gains.
  void UpdateErbGains();

  // Returns number of ERB filters.
  static size_t GetBankSize(int sample_rate, size_t erb_resolution);

  // Initializes ERB filterbank.
  std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

  // Analytically solves quadratic for optimal gains given |lambda|.
  // Negative gains are set to 0. Stores the results in |sols|.
  void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

  // Returns true if the audio is speech.
  bool IsSpeech(const float* audio);

  // Delays the high bands to compensate for the processing delay in the low
  // band.
  void DelayHighBands(AudioBuffer* audio);

  static const size_t kMaxNumNoiseEstimatesToBuffer = 5;

  const size_t freqs_;         // Num frequencies in frequency domain.
  const size_t num_noise_bins_;
  const size_t chunk_length_;  // Chunk size in samples.
  const size_t bank_size_;     // Num ERB filters.
  const int sample_rate_hz_;
  const size_t num_render_channels_;

  intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
  intelligibility::PowerEstimator<float> noise_power_estimator_;
  std::vector<float> filtered_clear_pow_;
  std::vector<float> filtered_noise_pow_;
  std::vector<float> center_freqs_;
  std::vector<std::vector<float>> capture_filter_bank_;
  std::vector<std::vector<float>> render_filter_bank_;
  size_t start_freq_;

  std::vector<float> gains_eq_;  // Pre-filter modified gains.
  intelligibility::GainApplier gain_applier_;

  std::unique_ptr<LappedTransform> render_mangler_;

  VoiceActivityDetector vad_;
  std::vector<int16_t> audio_s16_;
  size_t chunks_since_voice_;
  bool is_speech_;
  float snr_;
  bool is_active_;

  unsigned long int num_chunks_;
  unsigned long int num_active_chunks_;

  std::vector<float> noise_estimation_buffer_;
  SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
      noise_estimation_queue_;

  std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
      high_bands_buffers_;
};

}  // namespace webrtc

#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_