📄 vad2.c
字号:
#include "AmrCode_Comm.h"
#include "basic_op.h"
#include "vad.h"
Word16 fn10Log10 (Word32 L_Input, Word16 fbits){
Word16 integer; /* Integer part of Log2. (range: 0<=val<=30) */ Word16 fraction; /* Fractional part of Log2. (range: 0<=val<1) */
Word32 Ltmp; Word16 tmp; Log2(L_Input, &integer, &fraction);
integer = integer -fbits;
Ltmp = integer*49320 + ((fraction*24660>>15)<<1);
tmp = (Word16)L_shr_r(Ltmp, 6);
return (tmp);
}
/*
* The channel table is defined below. In this table, the
* lower and higher frequency coefficients for each of the 16
* channels are specified. The table excludes the coefficients
* with numbers 0 (DC), 1, and 64 (Foldover frequency).
*/
static Word16 ch_tbl[NUM_CHAN][2] =
{
{2, 3},
{4, 5},
{6, 7},
{8, 9},
{10, 11},
{12, 13},
{14, 16},
{17, 19},
{20, 22},
{23, 26},
{27, 30},
{31, 35},
{36, 41},
{42, 48},
{49, 55},
{56, 63}
};
/* channel energy scaling table - allows efficient division by number
* of DFT bins in the channel: 1/2, 1/3, 1/4, etc.
*/
static Word16 ch_tbl_sh[NUM_CHAN] =
{
16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923,
10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096
};
/* The voice metric table is defined below. It is a non-
* linear table with a deadband near zero. It maps the SNR
* index (quantized SNR value) to a number that is a measure
* of voice quality.*/
static Word16 vm_tbl[90] =
{
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7,
8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15,
15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24,
24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50,
50, 50
};
/* hangover as a function of peak SNR (3 dB steps) */
static Word16 hangover_table[20] =
{
30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8
};
/* burst sensitivity as a function of peak SNR (3 dB steps) */
static Word16 burstcount_table[20] =
{
8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4
};
/* voice metric sensitivity as a function of peak SNR (3 dB steps) */
static Word16 vm_threshold_table[20] =
{
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432
};
/* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */
static Word16 noise_floor_chan[2] = {NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1};
static Word16 min_chan_enrg[2] = {MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1};
static Word16 ine_noise[2] = {INE_NOISE_0, INE_NOISE_1};
static Word16 fbits[2] = {FRACTIONAL_BITS_0, FRACTIONAL_BITS_1};
static Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R};
/* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */
static Word16 enrg_norm_shift[2] = {(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)};
Word16 vad2 (Word16 * farray_ptr, vadState2 * st){ /* Automatic variables */
Word32 Lenrg; /* scaled as 30,1 */
Word32 Ltne; /* scaled as 22,9 */
Word32 Ltce; /* scaled as 22,9 or 27,4 */
Word16 tne_db; /* scaled as 7,8 */ Word16 tce_db; /* scaled as 7,8 */ Word16 input_buffer[FRM_LEN]; /* used for block normalising input data */ Word16 data_buffer[FFT_LEN]; /* used for in-place FFT */ Word16 ch_snr[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_snrq; /* scaled as 15,0 (in 0.375 dB steps) */ Word16 vm_sum; /* scaled as 15,0 */ Word16 ch_enrg_dev; /* scaled as 7,8 */ Word32 Lpeak; /* maximum channel energy */ Word16 p2a_flag; /* flag to indicate spectral peak-to-average ratio > 10 dB */
Word16 ch_enrg_db[NUM_CHAN]; /* scaled as 7,8 */ Word16 ch_noise_db; /* scaled as 7,8 */ Word16 alpha; /* scaled as 0,15 */ Word16 one_m_alpha; /* scaled as 0,15 */ Word16 update_flag; /* set to indicate a background noise estimate update */ Word16 i, j, j1, j2; /* Scratch variables */ Word16 hi1, lo1,hi2,lo2,hi3,lo3,hi4,lo4;
Word16 tmp,max,tmp1,tmp2,tmp3;
Word16 normb_shift; /* block norm shift count */
Word16 ivad; /* intermediate VAD decision (return value) */
Word16 tsnrq; /* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */
Word16 xt; /* instantaneous frame SNR in dB, scaled as 7,8 */
Word16 state_change;
Word32 tmp32, Ltmp, Ltmp0,Ltmp1,Ltmp2,Ltmp3,Ltmp4,Ltmp5;
Word16 *pFaray;
vadState2 * pst;
pFaray = farray_ptr;
pst = st;
/* Increment frame counter */ st->Lframe_cnt ++;
/* Block normalize the input */ //normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM);
max = -32768;
for (i = 0; i < FRM_LEN; i+=4)
{
tmp = pFaray[i] > 0 ? pFaray[i] : -pFaray[i];
tmp1 = pFaray[i+1]> 0 ? pFaray[i+1] : -pFaray[i+1];
tmp2 = pFaray[i+2]> 0 ? pFaray[i+2] : -pFaray[i+2];
tmp3 = pFaray[i+3]> 0 ? pFaray[i+3] : -pFaray[i+3];
tmp = tmp>tmp1 ? tmp :tmp1;
tmp2 = tmp2 > tmp3 ? tmp2 :tmp3;
tmp = tmp > tmp2 ? tmp :tmp2;
max = tmp > max ? tmp : max;
}
if (max != 0) { normb_shift = norm_s(max)- FFT_HEADROOM;
for (i = 0; i < FRM_LEN; i+= 4 )
{
input_buffer[i ] = normb_shift >0 ? pFaray[i ]<<normb_shift : pFaray[i ] >> -normb_shift;
input_buffer[i+1] = normb_shift >0 ? pFaray[i+1]<<normb_shift : pFaray[i+1] >> -normb_shift;
input_buffer[i+2] = normb_shift >0 ? pFaray[i+2]<<normb_shift : pFaray[i+2] >> -normb_shift;
input_buffer[i+3] = normb_shift >0 ? pFaray[i+3]<<normb_shift : pFaray[i+3] >> -normb_shift;
} } else { normb_shift = 14;
memset(input_buffer, 0, 160);
}
/* Pre-emphasize the input data and store in the data buffer with the appropriate offset */ memset(data_buffer, 0 , 48);
pst->pre_emp_mem = shr_r(pst->pre_emp_mem, (pst->last_normb_shift -normb_shift));
pst->last_normb_shift = normb_shift;
data_buffer[24] = input_buffer[0] + ( PRE_EMP_FAC*st->pre_emp_mem >>15 );
data_buffer[25] = input_buffer[1] + (PRE_EMP_FAC* input_buffer[0] >> 15 );
data_buffer[26] = input_buffer[2] + (PRE_EMP_FAC* input_buffer[1] >> 15 );
data_buffer[27] = input_buffer[3] + (PRE_EMP_FAC* input_buffer[2] >> 15 );
for (i = 28, j = 4; i < 104; i+=4, j+=4)
{ data_buffer[i] = input_buffer[j] + (PRE_EMP_FAC* input_buffer[j-1] >> 15 );
data_buffer[i+1] = input_buffer[j+1] + (PRE_EMP_FAC* input_buffer[j] >> 15 );
data_buffer[i+2] = input_buffer[j+2] + (PRE_EMP_FAC* input_buffer[j+1] >> 15 );
data_buffer[i+3] = input_buffer[j+3] + (PRE_EMP_FAC* input_buffer[j+2] >> 15 );
}
st->pre_emp_mem = input_buffer[FRM_LEN-1];
memset(&data_buffer[104],0, 48);
/* Perform FFT on the data buffer */ r_fft(data_buffer); /* Use normb_shift factor to determine the scaling of the energy estimates */ state_change = ( (pst->shift_state == 0)&&( normb_shift <= 0 ) )||((pst->shift_state != 0)&&( normb_shift >= 3 ))? 1 : 0;
pst->shift_state = (pst->shift_state == 0)&&( normb_shift <= 0 ) ? 1 : ((pst->shift_state != 0)&&( normb_shift >= 3 ) ? 0 : pst->shift_state);
/* Scale channel energy estimate */
if (state_change) { tmp = state_change_shift_r[pst->shift_state];
pst->Lch_enrg[0] = tmp >= 0 ? pst->Lch_enrg[0] >>tmp : ( pst->Lch_enrg[0] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[0]<< -tmp);
pst->Lch_enrg[1] = tmp >= 0 ? pst->Lch_enrg[1] >>tmp : ( pst->Lch_enrg[1] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[1]<< -tmp);
pst->Lch_enrg[2] = tmp >= 0 ? pst->Lch_enrg[2] >>tmp : ( pst->Lch_enrg[2] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[2]<< -tmp);
pst->Lch_enrg[3] = tmp >= 0 ? pst->Lch_enrg[3] >>tmp: ( pst->Lch_enrg[3] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[3]<< -tmp);
pst->Lch_enrg[4] = tmp >= 0 ? pst->Lch_enrg[4] >>tmp: ( pst->Lch_enrg[4] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[4]<< -tmp);
pst->Lch_enrg[5] = tmp >= 0 ? pst->Lch_enrg[5] >>tmp : ( pst->Lch_enrg[5] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[5]<< -tmp);
pst->Lch_enrg[6] = tmp >= 0 ? pst->Lch_enrg[6] >>tmp: ( pst->Lch_enrg[6] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[6]<< -tmp);
pst->Lch_enrg[7] = tmp >= 0 ? pst->Lch_enrg[7] >>tmp : ( pst->Lch_enrg[7] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[7]<< -tmp);
pst->Lch_enrg[8] = tmp >= 0 ? pst->Lch_enrg[8] >>tmp : ( pst->Lch_enrg[8] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[8]<< -tmp);
pst->Lch_enrg[9] = tmp >= 0 ? pst->Lch_enrg[9] >>tmp : ( pst->Lch_enrg[9] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[9]<< -tmp);
pst->Lch_enrg[10] = tmp >= 0 ? pst->Lch_enrg[10]>>tmp: ( pst->Lch_enrg[10] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[10]<< -tmp);
pst->Lch_enrg[11] = tmp >= 0 ? pst->Lch_enrg[11]>>tmp : ( pst->Lch_enrg[11] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[11]<< -tmp);
pst->Lch_enrg[12] = tmp >= 0 ? pst->Lch_enrg[12]>>tmp : ( pst->Lch_enrg[12] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[12]<< -tmp);
pst->Lch_enrg[13] = tmp >= 0 ? pst->Lch_enrg[13]>>tmp : ( pst->Lch_enrg[13] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[13]<< -tmp);
pst->Lch_enrg[14] = tmp >= 0 ? pst->Lch_enrg[14]>>tmp : ( pst->Lch_enrg[14] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[14]<< -tmp);
pst->Lch_enrg[15] = tmp >= 0 ? pst->Lch_enrg[15]>>tmp : ( pst->Lch_enrg[15] > (MAX_32 >>-tmp) ? MAX_32 : pst->Lch_enrg[15]<< -tmp);
} /* Estimate the energy in each channel */ alpha = (pst->Lframe_cnt == 1) ? 32767 : CEE_SM_FAC;
one_m_alpha = (pst->Lframe_cnt == 1) ? 0 : ONE_MINUS_CEE_SM_FAC;
for (i = LO_CHAN; i <= HI_CHAN; i++) { Lenrg = 0;
j1 = ch_tbl[i][0];
j2 = ch_tbl[i][1];
for (j = j1; j <= j2; j++) {
Lenrg += (data_buffer[2 * j]*data_buffer[2 * j] + data_buffer[2 * j + 1]*data_buffer[2 * j + 1])<<1;
} /* Denorm energy & scale 30,1 according to the state */ Lenrg = L_shr_r(Lenrg, ( (normb_shift<<1) - enrg_norm_shift[st->shift_state] ));
/* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */
tmp = (alpha*ch_tbl_sh[i] >> 15 );
hi1 = (Word16)(Lenrg >> 16);
lo1 = (Lenrg - (hi1<<16))>>1;
Ltmp = (hi1*tmp<<1) + ((lo1*tmp>>15)<<1);
hi1 = (Word16)(pst->Lch_enrg[i] >> 16);
lo1 = (pst->Lch_enrg[i] - (hi1<<16))>>1;
pst->Lch_enrg[i] = Ltmp + (hi1*one_m_alpha<<1) +(( lo1*one_m_alpha>>15)<<1);
pst->Lch_enrg[i] = pst->Lch_enrg[i] < min_chan_enrg[pst->shift_state] ? min_chan_enrg[pst->shift_state] : pst->Lch_enrg[i] ;
} /* Compute the total channel energy estimate (Ltce) */
Ltmp0 = pst->Lch_enrg[0] +pst->Lch_enrg[1] +pst->Lch_enrg[2] +pst->Lch_enrg[3];
Ltmp1 = pst->Lch_enrg[4] +pst->Lch_enrg[5] +pst->Lch_enrg[6] +pst->Lch_enrg[7];
Ltmp2 = pst->Lch_enrg[8] +pst->Lch_enrg[9] +pst->Lch_enrg[10] +pst->Lch_enrg[11];
Ltmp3 = pst->Lch_enrg[12] +pst->Lch_enrg[13] +pst->Lch_enrg[14] +pst->Lch_enrg[15];
Ltce = Ltmp0+Ltmp1+Ltmp2+Ltmp3;
/* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */ Lpeak = 0;
Ltmp0 = pst->Lch_enrg [2] > pst->Lch_enrg [3] ? pst->Lch_enrg [2] : pst->Lch_enrg [3] ;
Lpeak = Ltmp0 > Lpeak ? Ltmp0 : Lpeak;
/* Sine waves not valid for low frequencies */
Ltmp0 = pst->Lch_enrg [4] > pst->Lch_enrg [5] ? pst->Lch_enrg [4] : pst->Lch_enrg [5] ;
Ltmp1 = pst->Lch_enrg [6] > pst->Lch_enrg [7] ? pst->Lch_enrg [6] : pst->Lch_enrg [7] ;
Ltmp2 = pst->Lch_enrg [8] > pst->Lch_enrg [9] ? pst->Lch_enrg [8] : pst->Lch_enrg [9] ;
Ltmp3 = pst->Lch_enrg [10] > pst->Lch_enrg [11] ? pst->Lch_enrg [10] : pst->Lch_enrg [11] ;
Ltmp4 = pst->Lch_enrg [12] > pst->Lch_enrg [13] ? pst->Lch_enrg [12] : pst->Lch_enrg [13] ;
Ltmp5 = pst->Lch_enrg [14] > pst->Lch_enrg [15] ? pst->Lch_enrg [14] : pst->Lch_enrg [15] ;
Ltmp0 = Ltmp0>Ltmp1 ? Ltmp0 :Ltmp1; Ltmp2 = Ltmp2> Ltmp3 ? Ltmp2 : Ltmp3;
Ltmp4 = Ltmp4>Ltmp5 ? Ltmp4 : Ltmp5; Ltmp0 = Ltmp0>Ltmp2 ? Ltmp0 : Ltmp2;
Ltmp0 = Ltmp0> Ltmp4 ? Ltmp0 :Ltmp4;
Lpeak = Ltmp0 > Lpeak ? Ltmp0 : Lpeak;
/* Set p2a_flag if peak (dB) > average channel energy (dB) + 10 dB */ /* Lpeak > Ltce/num_channels * 10^(10/10) */ /* Lpeak > (10/16)*Ltce */
hi1 = (Word16)(Ltce>>16);
lo1 = (Ltce - (hi1<<16 ))>>1;
Ltmp = hi1*40960 + ((lo1*20480 >>15)<<1);
p2a_flag = Lpeak > Ltmp ? 1 : 0;
/* Initialize channel noise estimate to either the channel energy or fixed level */ /* Scale the energy appropriately to yield state 0 (22,9) scaling for noise */
if (pst->Lframe_cnt <= 4 )
{
if (p2a_flag == 1)
{
pst->Lch_noise[0] = pst->Lch_noise[1] = pst->Lch_noise[2] = pst->Lch_noise[3] =
pst->Lch_noise[4] = pst->Lch_noise[5] = pst->Lch_noise[6] = pst->Lch_noise[7] =
pst->Lch_noise[8] = pst->Lch_noise[9] = pst->Lch_noise[10] = pst->Lch_noise[11] =
pst->Lch_noise[12] = pst->Lch_noise[13] = pst->Lch_noise[14] = pst->Lch_noise[15] = INE_NOISE_0;
} else { for (i = LO_CHAN; i <= HI_CHAN; i++) {
if ( pst->Lch_enrg[i] < ine_noise[pst->shift_state])
{ pst->Lch_noise[i] = INE_NOISE_0;
} else {
pst->Lch_noise[i] = (pst->shift_state == 1) ? (pst->Lch_enrg[i] > 0x03ffffff ? MAX_32 : pst->Lch_enrg[i] << 5) : pst->Lch_enrg[i];
} } } } /* Compute the channel energy (in dB), the channel SNRs, and the sum of voice metrics */ vm_sum = 0;
for (i = LO_CHAN; i <= HI_CHAN; i++) { ch_enrg_db[i] = fn10Log10(st->Lch_enrg[i], fbits[st->shift_state]);
ch_noise_db = fn10Log10(st->Lch_noise[i], FRACTIONAL_BITS_0);
ch_snr[i] = ch_enrg_db[i] - ch_noise_db ;
/* quantize channel SNR in 3/8 dB steps (scaled 7,8 => 15,0) */
ch_snrq = shr_r((21845*ch_snr[i] >> 15 ), 6);
/* Accumulate the sum of voice metrics */
j = ch_snrq > 89 ? 89 : ( ch_snrq < 0? 0 : ch_snrq );
vm_sum += vm_tbl[j] ;
} /* Initialize NOMINAL peak voice energy and average noise energy, calculate instantaneous SNR */
if ( (st->Lframe_cnt <= 4) || st->fupdate_flag == 1)
{ /* tce_db = (96 - 22 - 10*log10(64) (due to FFT)) scaled as 7,8 */ tce_db = 14320;
pst->negSNRvar = 0;
pst->negSNRbias = 0;
/* Compute the total noise estimate (Ltne) */ //Ltne = 0;
Ltmp0 = pst->Lch_noise[0] +pst->Lch_noise[1]+pst->Lch_noise[2]+pst->Lch_noise[3];
Ltmp1 = pst->Lch_noise[4] +pst->Lch_noise[5]+pst->Lch_noise[6]+pst->Lch_noise[7];
Ltmp2 = pst->Lch_noise[8] +pst->Lch_noise[9]+pst->Lch_noise[10]+pst->Lch_noise[11];
Ltmp3 = pst->Lch_noise[12] +pst->Lch_noise[13]+pst->Lch_noise[14]+pst->Lch_noise[15];
Ltne = Ltmp0+Ltmp1+Ltmp2+Ltmp3;
/* Get total noise in dB */ tne_db = fn10Log10(Ltne, FRACTIONAL_BITS_0); /* Initialise instantaneous and long-term peak signal-to-noise ratios */ xt = tce_db - tne_db ;
st->tsnr = xt;
} else { /* Calculate instantaneous frame signal-to-noise ratio */ /* xt = 10*log10( sum(2.^(ch_snr*0.1*log2(10)))/length(ch_snr) ) */
//Ltmp1 = 0;
Ltmp0 = 0;
for (i=LO_CHAN; i<=HI_CHAN; i+=4)
{
Ltmp1 = (ch_snr[i]*10885 >> 7);
Ltmp2 = (ch_snr[i+1]*10885 >> 7);
Ltmp3 = (ch_snr[i+2]*10885 >> 7);
Ltmp4 = (ch_snr[i+3]*10885 >> 7);
hi1 = (Word16)(Ltmp1 >> 16); hi2 = (Word16)(Ltmp2 >> 16);hi3 = (Word16)(Ltmp3 >> 16); hi4 = (Word16)(Ltmp4 >> 16);
lo1 = (Ltmp1 - (hi1<<16))>>1; lo2 = (Ltmp2 - (hi2<<16))>>1;
lo3 = (Ltmp3 - (hi3<<16))>>1; lo4 = (Ltmp4 - (hi4<<16))>>1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -