File: beamformer.h

package info (click to toggle)
chromium-browser 41.0.2272.118-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 2,189,132 kB
  • sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11
file content (155 lines) | stat: -rw-r--r-- 6,277 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_

#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"

namespace webrtc {

// Enhances sound sources coming directly in front of a uniform linear array
// and suppresses sound sources coming from all other directions. Operates on
// multichannel signals and produces single-channel output.
//
// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
// Beamforming Postprocessor" by Bastiaan Kleijn.
//
// TODO: Target angle assumed to be 0. Parameterize target angle.
class Beamformer : public LappedTransform::Callback {
 public:
  // At the moment it only accepts uniform linear microphone arrays. Using the
  // first microphone as a reference position [0, 0, 0] is a natural choice.
  Beamformer(int chunk_size_ms,
             // Sample rate corresponds to the lower band.
             int sample_rate_hz,
             const std::vector<Point>& array_geometry);

  // Process one time-domain chunk of audio. The audio can be separated into
  // two signals by frequency, with the higher half passed in as the second
  // parameter. Use NULL for |high_pass_split_input| if you only have one
  // audio signal. The number of frames and channels must correspond to the
  // ctor parameters. The same signal can be passed in as |input| and |output|.
  void ProcessChunk(const float* const* input,
                    const float* const* high_pass_split_input,
                    int num_input_channels,
                    int num_frames_per_band,
                    float* const* output,
                    float* const* high_pass_split_output);

 protected:
  // Process one frequency-domain block of audio. This is where the fun
  // happens. Implements LappedTransform::Callback.
  void ProcessAudioBlock(const complex<float>* const* input,
                         int num_input_channels,
                         int num_freq_bins,
                         int num_output_channels,
                         complex<float>* const* output);

 private:
  typedef Matrix<float> MatrixF;
  typedef ComplexMatrix<float> ComplexMatrixF;
  typedef complex<float> complex_f;

  void InitDelaySumMasks();
  void InitTargetCovMats();  // TODO: Make this depend on target angle.
  void InitInterfCovMats();

  // An implementation of equation 18, which calculates postfilter masks that,
  // when applied, minimize the mean-square error of our estimation of the
  // desired signal. A sub-task is to calculate lambda, which is solved via
  // equation 13.
  float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
                                float rpsiw,
                                float ratio_rxiw_rxim,
                                float rmxi_r,
                                float mask_threshold);

  // Prevents the postfilter masks from degenerating too quickly (a cause of
  // musical noise).
  void ApplyDecay();

  // The postfilter masks are unreliable at low frequencies. Calculates a better
  // mask by averaging mid-low frequency values.
  void ApplyLowFrequencyCorrection();

  // Postfilter masks are also unreliable at high frequencies. Average mid-high
  // frequency masks to calculate a single mask per block which can be applied
  // in the time-domain. Further, we average these block-masks over a chunk,
  // resulting in one postfilter mask per audio chunk. This allows us to skip
  // both transforming and blocking the high-frequency signal.
  void CalculateHighFrequencyMask();

  // Applies both sets of masks to |input| and store in |output|.
  void ApplyMasks(const complex_f* const* input, complex_f* const* output);

  float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);

  // Deals with the fft transform and blocking.
  const int chunk_length_;
  scoped_ptr<LappedTransform> lapped_transform_;
  scoped_ptr<float[]> window_;

  // Parameters exposed to the user.
  const int num_input_channels_;
  const int sample_rate_hz_;
  const float mic_spacing_;

  // Calculated based on user-input and constants in the .cc file.
  const float decay_threshold_;
  const int mid_frequency_lower_bin_bound_;
  const int mid_frequency_upper_bin_bound_;
  const int high_frequency_lower_bin_bound_;
  const int high_frequency_upper_bin_bound_;

  // Indices into |postfilter_masks_|.
  int current_block_ix_;
  int previous_block_ix_;

  // Old masks are saved in this ring buffer for smoothing. Array of length
  // |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|.
  scoped_ptr<MatrixF[]> postfilter_masks_;

  // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
  scoped_ptr<ComplexMatrixF[]> delay_sum_masks_;

  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
  // |num_input_channels_|.
  scoped_ptr<ComplexMatrixF[]> target_cov_mats_;

  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
  // |num_input_channels_|.
  scoped_ptr<ComplexMatrixF[]> interf_cov_mats_;
  scoped_ptr<ComplexMatrixF[]> reflected_interf_cov_mats_;

  // Of length |kNumFreqBins|.
  scoped_ptr<float[]> mask_thresholds_;
  scoped_ptr<float[]> wave_numbers_;

  // Preallocated for ProcessAudioBlock()
  // Of length |kNumFreqBins|.
  scoped_ptr<float[]> rxiws_;
  scoped_ptr<float[]> rpsiws_;
  scoped_ptr<float[]> reflected_rpsiws_;

  // The microphone normalization factor.
  ComplexMatrixF eig_m_;

  // For processing the high-frequency input signal.
  bool high_pass_exists_;
  int num_blocks_in_this_chunk_;
  float high_pass_postfilter_mask_;
};

}  // namespace webrtc

#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_