File: vad_core.h

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (123 lines) | stat: -rw-r--r-- 4,067 bytes parent folder | download | duplicates (29)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * This header file includes the descriptions of the core VAD calls.
 */

#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
#define COMMON_AUDIO_VAD_VAD_CORE_H_

#include "common_audio/signal_processing/include/signal_processing_library.h"

// TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro.
#if defined(__cplusplus)
#define CONSTEXPR_INT(x) constexpr int x
#else
#define CONSTEXPR_INT(x) enum { x }
#endif

CONSTEXPR_INT(kNumChannels = 6);  // Number of frequency bands (named channels).
CONSTEXPR_INT(
    kNumGaussians = 2);  // Number of Gaussians per channel in the GMM.
CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians);
CONSTEXPR_INT(
    kMinEnergy = 10);  // Minimum energy required to trigger audio signal.

typedef struct VadInstT_ {
  int vad;
  int32_t downsampling_filter_states[4];
  WebRtcSpl_State48khzTo8khz state_48_to_8;
  int16_t noise_means[kTableSize];
  int16_t speech_means[kTableSize];
  int16_t noise_stds[kTableSize];
  int16_t speech_stds[kTableSize];
  // TODO(bjornv): Change to `frame_count`.
  int32_t frame_counter;
  int16_t over_hang;  // Over Hang
  int16_t num_of_speech;
  // TODO(bjornv): Change to `age_vector`.
  int16_t index_vector[16 * kNumChannels];
  int16_t low_value_vector[16 * kNumChannels];
  // TODO(bjornv): Change to `median`.
  int16_t mean_value[kNumChannels];
  int16_t upper_state[5];
  int16_t lower_state[5];
  int16_t hp_filter_state[4];
  int16_t over_hang_max_1[3];
  int16_t over_hang_max_2[3];
  int16_t individual[3];
  int16_t total[3];

  int init_flag;
} VadInstT;

// Initializes the core VAD component. The default aggressiveness mode is
// controlled by `kDefaultMode` in vad_core.c.
//
// - self [i/o] : Instance that should be initialized
//
// returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
//                set)
int WebRtcVad_InitCore(VadInstT* self);

/****************************************************************************
 * WebRtcVad_set_mode_core(...)
 *
 * This function changes the VAD settings
 *
 * Input:
 *      - inst      : VAD instance
 *      - mode      : Aggressiveness degree
 *                    0 (High quality) - 3 (Highly aggressive)
 *
 * Output:
 *      - inst      : Changed  instance
 *
 * Return value     :  0 - Ok
 *                    -1 - Error
 */

int WebRtcVad_set_mode_core(VadInstT* self, int mode);

/****************************************************************************
 * WebRtcVad_CalcVad48khz(...)
 * WebRtcVad_CalcVad32khz(...)
 * WebRtcVad_CalcVad16khz(...)
 * WebRtcVad_CalcVad8khz(...)
 *
 * Calculate probability for active speech and make VAD decision.
 *
 * Input:
 *      - inst          : Instance that should be initialized
 *      - speech_frame  : Input speech frame
 *      - frame_length  : Number of input samples
 *
 * Output:
 *      - inst          : Updated filter states etc.
 *
 * Return value         : VAD decision
 *                        0 - No active speech
 *                        1-6 - Active speech
 */
int WebRtcVad_CalcVad48khz(VadInstT* inst,
                           const int16_t* speech_frame,
                           size_t frame_length);
int WebRtcVad_CalcVad32khz(VadInstT* inst,
                           const int16_t* speech_frame,
                           size_t frame_length);
int WebRtcVad_CalcVad16khz(VadInstT* inst,
                           const int16_t* speech_frame,
                           size_t frame_length);
int WebRtcVad_CalcVad8khz(VadInstT* inst,
                          const int16_t* speech_frame,
                          size_t frame_length);

#endif  // COMMON_AUDIO_VAD_VAD_CORE_H_