📄 vad.h
字号:
#ifndef VAD_H#define VAD_H#include "typedef.h"#define VAD1 /* VAD Algorithm Selection Compile Switch */#define FRAME_LEN 160 /* Length (samples) of the input frame */#define COMPLEN 9 /* Number of sub-bands used by VAD */#define INV_COMPLEN 3641 /* 1.0/COMPLEN*2^15 */#define LOOKAHEAD 40 /* length of the lookahead used by speech coder */#define UNITY 512 /* Scaling used with SNR calculation */#define UNIRSHFT 6 /* = log2(MAX_16/UNITY) */#define TONE_THR (Word16)(0.65*MAX_16) /* Threshold for tone detection *//* Constants for background spectrum update */#define ALPHA_UP1 (Word16)((1.0 - 0.95)*MAX_16) /* Normal update, upwards: */#define ALPHA_DOWN1 (Word16)((1.0 - 0.936)*MAX_16) /* Normal update, downwards */#define ALPHA_UP2 (Word16)((1.0 - 0.985)*MAX_16) /* Forced update, upwards */#define ALPHA_DOWN2 (Word16)((1.0 - 0.943)*MAX_16) /* Forced update, downwards */#define ALPHA3 (Word16)((1.0 - 0.95)*MAX_16) /* Update downwards */#define ALPHA4 (Word16)((1.0 - 0.9)*MAX_16) /* For stationary estimation */#define ALPHA5 (Word16)((1.0 - 0.5)*MAX_16) /* For stationary estimation *//* Constants for VAD threshold */#define VAD_THR_HIGH 1260 /* Highest threshold */#define VAD_THR_LOW 720 /* Lowest threshold */#define VAD_P1 0 /* Noise level for highest threshold */#define VAD_P2 6300 /* Noise level for lowest threshold */#define VAD_SLOPE (Word16)(MAX_16*(float)(VAD_THR_LOW-VAD_THR_HIGH)/(float)(VAD_P2-VAD_P1))/* Parameters for background spectrum recovery function */#define STAT_COUNT 20 /* threshold of stationary detection counter */#define STAT_COUNT_BY_2 10 /* threshold of stationary detection counter */#define CAD_MIN_STAT_COUNT 5 /* threshold of stationary detection counter */#define STAT_THR_LEVEL 184 /* Threshold level for stationarity detection */#define STAT_THR 1000 /* Threshold for stationarity detection *//* Limits for background noise estimate */#define NOISE_MIN 40 /* minimum */#define NOISE_MAX 16000 /* maximum */#define NOISE_INIT 150 /* initial *//* Constants for VAD hangover addition */#define HANG_NOISE_THR 100#define BURST_LEN_HIGH_NOISE 4#define HANG_LEN_HIGH_NOISE 7#define BURST_LEN_LOW_NOISE 5#define HANG_LEN_LOW_NOISE 4/* Thresholds for signal power */#define VAD_POW_LOW (Word32)15000 /* If input power is lower, . VAD is set to 0 */#define POW_PITCH_THR (Word32)343040 /* If input power is lower, pitch . detection is ignored */#define POW_COMPLEX_THR (Word32)15000 /* If input power is lower, complex . flags value for previous frame is un-set */ /* Constants for the filter bank */#define LEVEL_SHIFT 0 /* scaling */#define COEFF3 13363 /* coefficient for the 3rd order filter */#define COEFF5_1 21955 /* 1st coefficient the for 5th order filter */#define COEFF5_2 6390 /* 2nd coefficient the for 5th order filter *//* Constants for pitch detection */#define LTHRESH 4#define NTHRESH 4/* Constants for complex signal VAD */#define CVAD_THRESH_ADAPT_HIGH (Word16)(0.6 * MAX_16) /* threshold for adapt stopping high */#define CVAD_THRESH_ADAPT_LOW (Word16)(0.5 * MAX_16) /* threshold for adapt stopping low */#define CVAD_THRESH_IN_NOISE (Word16)(0.65 * MAX_16) /* threshold going into speech on a short term basis */#define CVAD_THRESH_HANG (Word16)(0.70 * MAX_16) /* threshold */#define CVAD_HANG_LIMIT (Word16)(100) /* 2 second estimation time */#define CVAD_HANG_LENGTH (Word16)(250) /* 5 second hangover */#define CVAD_LOWPOW_RESET (Word16) (0.40 * MAX_16) /* init in low power segment */#define CVAD_MIN_CORR (Word16) (0.40 * MAX_16) /* lowest adaptation value */#define CVAD_BURST 20 /* speech burst length for speech reset */#define CVAD_ADAPT_SLOW (Word16)(( 1.0 - 0.98) * MAX_16) /* threshold for slow adaption */#define CVAD_ADAPT_FAST (Word16)((1.0 - 0.92) * MAX_16) /* threshold for fast adaption */#define CVAD_ADAPT_REALLY_FAST (Word16)((1.0 - 0.80) * MAX_16) /* threshold for really fastadaption *//* state variable */typedef struct { Word16 bckr_est[COMPLEN]; /* background noise estimate */ Word16 ave_level[COMPLEN]; /* averaged input components for stationary . estimation */ Word16 old_level[COMPLEN]; /* input levels of the previous frame */ Word16 sub_level[COMPLEN]; /* input levels calculated at the end of a frame (lookahead) */ Word16 a_data5[3][2]; /* memory for the filter bank */ Word16 a_data3[5]; /* memory for the filter bank */ Word16 burst_count; /* counts length of a speech burst */ Word16 hang_count; /* hangover counter */ Word16 stat_count; /* stationary counter */ /* Note that each of the following three variables (vadreg, pitch and tone) holds 15 flags. Each flag reserves 1 bit of the variable. The newest flag is in the bit 15 (assuming that LSB is bit 1 and MSB is bit 16). */ Word16 vadreg; /* flags for intermediate VAD decisions */ Word16 pitch; /* flags for pitch detection */ Word16 tone; /* flags for tone detection */ Word16 complex_high; /* flags for complex detection */ Word16 complex_low; /* flags for complex detection */ Word16 oldlag_count, oldlag; /* variables for pitch detection */ Word16 complex_hang_count; /* complex hangover counter, used by VAD */ Word16 complex_hang_timer; /* hangover initiator, used by CAD */ Word16 best_corr_hp; /* FIP filtered value Q15 */ Word16 speech_vad_decision; /* final decision */ Word16 complex_warning; /* complex background warning */ Word16 sp_burst_count; /* counts length of a speech burst incl HO addition */ Word16 corr_hp_fast; /* filtered value */ } vadState1;/***** Defines ****/#define YES 1#define NO 0#define ON 1#define OFF 0#define TRUE 1#define FALSE 0#define FRM_LEN 80#define DELAY 24#define FFT_LEN 128#define NUM_CHAN 16#define LO_CHAN 0#define HI_CHAN 15#define UPDATE_THLD 35#define HYSTER_CNT_THLD 6#define UPDATE_CNT_THLD 50#define SHIFT_STATE_0 0 /* channel energy scaled as 22,9 */#define SHIFT_STATE_1 1 /* channel energy scaled as 27,4 */#define NOISE_FLOOR_CHAN_0 512 /* 1.0 scaled as 22,9 */#define MIN_CHAN_ENRG_0 32 /* 0.0625 scaled as 22,9 */#define MIN_NOISE_ENRG_0 32 /* 0.0625 scaled as 22,9 */#define INE_NOISE_0 8192 /* 16.0 scaled as 22,9 */#define FRACTIONAL_BITS_0 9 /* used as input to fn10Log10() */#define NOISE_FLOOR_CHAN_1 16 /* 1.0 scaled as 27,4 */#define MIN_CHAN_ENRG_1 1 /* 0.0625 scaled as 27,4 */#define MIN_NOISE_ENRG_1 1 /* 0.0625 scaled as 27,4 */#define INE_NOISE_1 256 /* 16.0 scaled as 27,4 */#define FRACTIONAL_BITS_1 4 /* used as input to fn10Log10() */#define STATE_1_TO_0_SHIFT_R (FRACTIONAL_BITS_1-FRACTIONAL_BITS_0) /* state correction factor */#define STATE_0_TO_1_SHIFT_R (FRACTIONAL_BITS_0-FRACTIONAL_BITS_1) /* state correction factor */#define HIGH_ALPHA 29491 /* 0.9 scaled as 0,15 */#define LOW_ALPHA 22938 /* 0.7 scaled as 0,15 */#define ALPHA_RANGE (HIGH_ALPHA - LOW_ALPHA)#define DEV_THLD 7168 /* 28.0 scaled as 7,8 */#define PRE_EMP_FAC (-26214) /* -0.8 scaled as 0,15 */#define CEE_SM_FAC 18022 /* 0.55 scaled as 0,15 */#define ONE_MINUS_CEE_SM_FAC 14746 /* 0.45 scaled as 0,15 */#define CNE_SM_FAC 3277 /* 0.1 scaled as 0,15 */#define ONE_MINUS_CNE_SM_FAC 29491 /* 0.9 scaled as 0,15 */#define FFT_HEADROOM 2typedef struct{ Word16 pre_emp_mem; Word16 update_cnt; Word16 hyster_cnt; Word16 last_update_cnt; Word16 ch_enrg_long_db[NUM_CHAN]; /* scaled as 7,8 */ Word32 Lframe_cnt; Word32 Lch_enrg[NUM_CHAN]; /* scaled as 22,9 or 27,4 */ Word32 Lch_noise[NUM_CHAN]; /* scaled as 22,9 */ Word16 last_normb_shift; /* last block norm shift count */ Word16 tsnr; /* total signal-to-noise ratio in dB (scaled as 7,8) */ Word16 hangover; Word16 burstcount; Word16 fupdate_flag; /* forced update flag from previous frame */ Word16 negSNRvar; /* Negative SNR variance (scaled as 7,8) */ Word16 negSNRbias; /* sensitivity bias from negative SNR variance (scaled as 15,0) */ Word16 shift_state; /* use 22,9 or 27,4 scaling for ch_enrg[] */ Word32 L_R0; Word32 L_Rmax; Flag LTP_flag; /* Use to indicate the the LTP gain is > LTP_THRESH */} vadState2;/********************************************************************************** DECLARATION OF PROTOTYPES*********************************************************************************/int vad1_init (vadState1 **st); int vad1_reset (vadState1 *st);void vad1_exit (vadState1 **st);Word16 vad1 (vadState1 *st, Word16 in_buf[] );Word16 vad2 (Word16 *farray_ptr, vadState2 *st);int vad2_init (vadState2 **st);void vad2_exit (vadState2 **state);void r_fft (Word16 *farray_ptr);#ifndef VAD2#define vadState vadState1#else #define vadState vadState2#endif#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -