📄 vad2.c
字号:
/******************************************************************************* * GSM AMR-NB speech codec R98 Version 7.6.0 December 12, 2001* R99 Version 3.3.0 * REL-4 Version 4.1.0 ******************************************************************************** File : vad2.c* Purpose : Voice Activity Detection (VAD) for AMR (option 2)*******************************************************************************//******************************************************************************* VERSION ID******************************************************************************/const char vad2_id[] = "@(#)$Id $";/*************************************************************************** * * FUNCTION NAME: vad2() * * PURPOSE: * This function provides the Voice Activity Detection function option 2 * for the Adaptive Multi-rate (AMR) codec. * * INPUTS: * * farray_ptr * pointer to Word16[80] input array * vadState2 * pointer to vadState2 state structure * * OUTPUTS: * * state variables are updated * * RETURN VALUE: * * Word16 * VAD(m) - two successive calls to vad2() yield * the VAD decision for the 20 ms frame: * VAD_flag = VAD(m-1) || VAD(m) * * *************************************************************************//* Includes */#include <stdio.h>#include <stdlib.h>#include "typedef.h"#include "cnst.h"#include "basic_op.h"#include "oper_32b.h"#include "count.h"#include "log2.h"#include "pow2.h"#include "vad2.h"/* Local functions *//*************************************************************************** * * FUNCTION NAME: fn10Log10 * * PURPOSE: * The purpose of this function is to take the 10*log base 10 of input and * divide by 128 and return; i.e. output = 10*log10(input)/128 (scaled as 7,8) * * INPUTS: * * L_Input * input (scaled as 31-fbits,fbits) * fbits * number of fractional bits on input * * OUTPUTS: * * none * * RETURN VALUE: * * Word16 * output (scaled as 7,8) * * DESCRIPTION: * * 10*log10(x)/128 = 10*(log10(2) * (log2(x<<fbits)-log2(1<<fbits)) >> 7 * = 3.0103 * (log2(x<<fbits) - fbits) >> 7 * = ((3.0103/4.0 * (log2(x<<fbits) - fbits) << 2) >> 7 * = (3.0103/4.0 * (log2(x<<fbits) - fbits) >> 5 * *************************************************************************/Word16 fn10Log10 (Word32 L_Input, Word16 fbits){ Word16 integer; /* Integer part of Log2. (range: 0<=val<=30) */ Word16 fraction; /* Fractional part of Log2. (range: 0<=val<1) */ Word32 Ltmp; Word16 tmp; Log2(L_Input, &integer, &fraction); integer = sub(integer, fbits); Ltmp = Mpy_32_16 (integer, fraction, 24660); /* 24660 = 10*log10(2)/4 scaled 0,15 */ Ltmp = L_shr_r(Ltmp, 5+1); /* extra shift for 30,1 => 15,0 extract correction */ tmp = extract_l(Ltmp); return (tmp);}/*************************************************************************** * * FUNCTION NAME: block_norm * * PURPOSE: * The purpose of this function is block normalise the input data sequence * * INPUTS: * * &in[0] * pointer to data sequence to be normalised * length * number of elements in data sequence * headroom * number of headroom bits (i.e., * * OUTPUTS: * * &out[0] * normalised output data sequence pointed to by &out[0] * * RETURN VALUE: * * Word16 * number of bits sequence was left shifted * * DESCRIPTION: * * 1) Search for maximum absolute valued data element * 2) Normalise the max element with "headroom" * 3) Transfer/shift the input sequence to the output buffer * 4) Return the number of left shifts * * CAVEATS: * An input sequence of all zeros will return the maximum * number of left shifts allowed, NOT the value returned * by a norm_s(0) call, since it desired to associate an * all zeros sequence with low energy. * *************************************************************************/Word16 block_norm (Word16 * in, Word16 * out, Word16 length, Word16 headroom){ Word16 i, max, scnt, adata; max = abs_s(in[0]); for (i = 1; i < length; i++) { adata = abs_s(in[i]); test(); if (sub(adata, max) > 0) { max = adata; move16(); } } test(); if (max != 0) { scnt = sub(norm_s(max), headroom); for (i = 0; i < length; i++) { out[i] = shl(in[i], scnt); move16(); } } else { scnt = sub(16, headroom); for (i = 0; i < length; i++) { out[i] = 0; move16(); } } return (scnt);}/********************************************* The VAD function ***************************************************/Word16 vad2 (Word16 * farray_ptr, vadState2 * st){ /* * The channel table is defined below. In this table, the * lower and higher frequency coefficients for each of the 16 * channels are specified. The table excludes the coefficients * with numbers 0 (DC), 1, and 64 (Foldover frequency). */ static Word16 ch_tbl[NUM_CHAN][2] = { {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}, {12, 13}, {14, 16}, {17, 19}, {20, 22}, {23, 26}, {27, 30}, {31, 35}, {36, 41}, {42, 48}, {49, 55}, {56, 63} }; /* channel energy scaling table - allows efficient division by number * of DFT bins in the channel: 1/2, 1/3, 1/4, etc. */ static Word16 ch_tbl_sh[NUM_CHAN] = { 16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923, 10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096 }; /* * The voice metric table is defined below. It is a non- * linear table with a deadband near zero. It maps the SNR * index (quantized SNR value) to a number that is a measure * of voice quality. */ static Word16 vm_tbl[90] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24, 24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50 }; /* hangover as a function of peak SNR (3 dB steps) */ static Word16 hangover_table[20] = { 30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8 }; /* burst sensitivity as a function of peak SNR (3 dB steps) */ static Word16 burstcount_table[20] = { 8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; /* voice metric sensitivity as a function of peak SNR (3 dB steps) */ static Word16 vm_threshold_table[20] = { 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432 }; /* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */ static Word16 noise_floor_chan[2] = {NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1}; static Word16 min_chan_enrg[2] = {MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1}; static Word16 ine_noise[2] = {INE_NOISE_0, INE_NOISE_1}; static Word16 fbits[2] = {FRACTIONAL_BITS_0, FRACTIONAL_BITS_1}; static Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R}; /* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */ static Word16 enrg_norm_shift[2] = {(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)}; /* Automatic variables */ Word32 Lenrg; /* scaled as 30,1 */ Word32 Ltne; /* scaled as 22,9 */ Word32 Ltce; /* scaled as 22,9 or 27,4 */ Word16 tne_db; /* scaled as 7,8 */ Word16 tce_db; /* scaled as 7,8 */ Word16 input_buffer[FRM_LEN]; /* used for block normalising input data */ Word16 data_buffer[FFT_LEN]; /* used for in-place FFT */ Word16 ch_snr[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_snrq; /* scaled as 15,0 (in 0.375 dB steps) */ Word16 vm_sum; /* scaled as 15,0 */ Word16 ch_enrg_dev; /* scaled as 7,8 */ Word32 Lpeak; /* maximum channel energy */ Word16 p2a_flag; /* flag to indicate spectral peak-to-average ratio > 10 dB */ Word16 ch_enrg_db[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_noise_db; /* scaled as 7,8 */ Word16 alpha; /* scaled as 0,15 */ Word16 one_m_alpha; /* scaled as 0,15 */ Word16 update_flag; /* set to indicate a background noise estimate update */ Word16 i, j, j1, j2; /* Scratch variables */ Word16 hi1, lo1; Word32 Ltmp, Ltmp1, Ltmp2; Word16 tmp; Word16 normb_shift; /* block norm shift count */ Word16 ivad; /* intermediate VAD decision (return value) */ Word16 tsnrq; /* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */ Word16 xt; /* instantaneous frame SNR in dB, scaled as 7,8 */ Word16 state_change; /* Increment frame counter */ st->Lframe_cnt = L_add(st->Lframe_cnt, 1); /* Block normalize the input */ normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM); /* Pre-emphasize the input data and store in the data buffer with the appropriate offset */ for (i = 0; i < DELAY; i++) { data_buffer[i] = 0; move16(); } st->pre_emp_mem = shr_r(st->pre_emp_mem, sub(st->last_normb_shift, normb_shift)); st->last_normb_shift = normb_shift; move16(); data_buffer[DELAY] = add(input_buffer[0], mult(PRE_EMP_FAC, st->pre_emp_mem)); move16(); for (i = DELAY + 1, j = 1; i < DELAY + FRM_LEN; i++, j++) { data_buffer[i] = add(input_buffer[j], mult(PRE_EMP_FAC, input_buffer[j-1])); move16(); } st->pre_emp_mem = input_buffer[FRM_LEN-1]; move16(); for (i = DELAY + FRM_LEN; i < FFT_LEN; i++) { data_buffer[i] = 0; move16(); } /* Perform FFT on the data buffer */ r_fft(data_buffer); /* Use normb_shift factor to determine the scaling of the energy estimates */ state_change = 0; move16(); test(); if (st->shift_state == 0) { test(); if (sub(normb_shift, -FFT_HEADROOM+2) <= 0) { state_change = 1; move16(); st->shift_state = 1; move16(); } } else { test(); if (sub(normb_shift, -FFT_HEADROOM+5) >= 0) { state_change = 1; move16(); st->shift_state = 0; move16(); } } /* Scale channel energy estimate */ test(); if (state_change) { for (i = LO_CHAN; i <= HI_CHAN; i++) { st->Lch_enrg[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[st->shift_state]); move32(); } } /* Estimate the energy in each channel */ test(); if (L_sub(st->Lframe_cnt, 1) == 0) { alpha = 32767; move16(); one_m_alpha = 0; move16(); } else { alpha = CEE_SM_FAC; move16(); one_m_alpha = ONE_MINUS_CEE_SM_FAC; move16(); } for (i = LO_CHAN; i <= HI_CHAN; i++) { Lenrg = 0; move16(); j1 = ch_tbl[i][0]; move16(); j2 = ch_tbl[i][1]; move16(); for (j = j1; j <= j2; j++) { Lenrg = L_mac(Lenrg, data_buffer[2 * j], data_buffer[2 * j]); Lenrg = L_mac(Lenrg, data_buffer[2 * j + 1], data_buffer[2 * j + 1]); } /* Denorm energy & scale 30,1 according to the state */ Lenrg = L_shr_r(Lenrg, sub(shl(normb_shift, 1), enrg_norm_shift[st->shift_state])); /* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */ tmp = mult(alpha, ch_tbl_sh[i]); L_Extract (Lenrg, &hi1, &lo1); Ltmp = Mpy_32_16(hi1, lo1, tmp); L_Extract (st->Lch_enrg[i], &hi1, &lo1); st->Lch_enrg[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, one_m_alpha)); move32(); test(); if (L_sub(st->Lch_enrg[i], min_chan_enrg[st->shift_state]) < 0) { st->Lch_enrg[i] = min_chan_enrg[st->shift_state]; move32(); } } /* Compute the total channel energy estimate (Ltce) */ Ltce = 0; move16(); for (i = LO_CHAN; i <= HI_CHAN; i++) { Ltce = L_add(Ltce, st->Lch_enrg[i]); } /* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */ Lpeak = 0; move32(); for (i = LO_CHAN+2; i <= HI_CHAN; i++) /* Sine waves not valid for low frequencies */ { test(); if (L_sub(st->Lch_enrg [i], Lpeak) > 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -