File: transient_suppressor.h

package info (click to toggle)

chromium-browser 41.0.2272.118-1

links: PTS, VCS
area: main
in suites: jessie-kfreebsd
size: 2,189,132 kB
sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11

file content (120 lines) | stat: -rw-r--r-- 4,036 bytes

/*
 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

#include <deque>
#include <set>

#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/gtest_prod_util.h"
#include "webrtc/typedefs.h"

namespace webrtc {

class TransientDetector;

// Detects transients in an audio stream and suppress them using a simple
// restoration algorithm that attenuates unexpected spikes in the spectrum.
class TransientSuppressor {
 public:
  TransientSuppressor();
  ~TransientSuppressor();

  int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);

  // Processes a |data| chunk, and returns it with keystrokes suppressed from
  // it. The float format is assumed to be int16 ranged. If there are more than
  // one channel, the chunks are concatenated one after the other in |data|.
  // |data_length| must be equal to |data_length_|.
  // |num_channels| must be equal to |num_channels_|.
  // A sub-band, ideally the higher, can be used as |detection_data|. If it is
  // NULL, |data| is used for the detection too. The |detection_data| is always
  // assumed mono.
  // If a reference signal (e.g. keyboard microphone) is available, it can be
  // passed in as |reference_data|. It is assumed mono and must have the same
  // length as |data|. NULL is accepted if unavailable.
  // This suppressor performs better if voice information is available.
  // |voice_probability| is the probability of voice being present in this chunk
  // of audio. If voice information is not available, |voice_probability| must
  // always be set to 1.
  // |key_pressed| determines if a key was pressed on this audio chunk.
  // Returns 0 on success and -1 otherwise.
  int Suppress(float* data,
               size_t data_length,
               int num_channels,
               const float* detection_data,
               size_t detection_length,
               const float* reference_data,
               size_t reference_length,
               float voice_probability,
               bool key_pressed);

 private:
  FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
                           TypingDetectionLogicWorksAsExpectedForMono);
  void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);

  void UpdateKeypress(bool key_pressed);
  void UpdateRestoration(float voice_probability);

  void UpdateBuffers(float* data);

  void HardRestoration(float* spectral_mean);
  void SoftRestoration(float* spectral_mean);

  scoped_ptr<TransientDetector> detector_;

  size_t data_length_;
  size_t detection_length_;
  size_t analysis_length_;
  size_t buffer_delay_;
  size_t complex_analysis_length_;
  int num_channels_;
  // Input buffer where the original samples are stored.
  scoped_ptr<float[]> in_buffer_;
  scoped_ptr<float[]> detection_buffer_;
  // Output buffer where the restored samples are stored.
  scoped_ptr<float[]> out_buffer_;

  // Arrays for fft.
  scoped_ptr<int[]> ip_;
  scoped_ptr<float[]> wfft_;

  scoped_ptr<float[]> spectral_mean_;

  // Stores the data for the fft.
  scoped_ptr<float[]> fft_buffer_;

  scoped_ptr<float[]> magnitudes_;

  const float* window_;

  scoped_ptr<float[]> mean_factor_;

  float detector_smoothed_;

  int keypress_counter_;
  int chunks_since_keypress_;
  bool detection_enabled_;
  bool suppression_enabled_;

  bool use_hard_restoration_;
  int chunks_since_voice_change_;

  uint32_t seed_;

  bool using_reference_;
};

}  // namespace webrtc

#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_