⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vad2.c

📁 AMR-NB 的编码实现,纯C, VC下建立工程即可用.
💻 C
📖 第 1 页 / 共 2 页
字号:

#include "AmrCode_Comm.h"
#include "basic_op.h"
#include "vad.h"

Word16 fn10Log10 (Word32 L_Input, Word16 fbits){
	Word16 integer;		/* Integer part of Log2.   (range: 0<=val<=30) */	Word16 fraction;   	/* Fractional part of Log2. (range: 0<=val<1) */
	Word32 Ltmp;	Word16 tmp;       Log2(L_Input, &integer, &fraction);
	integer =  integer -fbits;

	Ltmp = integer*49320 + ((fraction*24660>>15)<<1);
	tmp = (Word16)L_shr_r(Ltmp, 6);
       return (tmp);
		
}
/*
* The channel table is defined below.  In this table, the
* lower and higher frequency coefficients for each of the 16
* channels are specified.  The table excludes the coefficients
* with numbers 0 (DC), 1, and 64 (Foldover frequency).
*/

static Word16 ch_tbl[NUM_CHAN][2] =
{
{2, 3},
{4, 5},
{6, 7},
{8, 9},
{10, 11},
{12, 13},
{14, 16},
{17, 19},
{20, 22},
{23, 26},
{27, 30},
{31, 35},
{36, 41},
{42, 48},
{49, 55},
{56, 63}

};

/* channel energy scaling table - allows efficient division by number
* of DFT bins in the channel: 1/2, 1/3, 1/4, etc.
*/

static Word16 ch_tbl_sh[NUM_CHAN] =
{
16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923,
10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096
};

/* The voice metric table is defined below.  It is a non-
* linear table with a deadband near zero.  It maps the SNR
* index (quantized SNR value) to a number that is a measure
* of voice quality.*/

static Word16 vm_tbl[90] =
{
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7,
8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15,
15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24,
24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50,
50, 50
};

/* hangover as a function of peak SNR (3 dB steps) */
static Word16 hangover_table[20] =
{
30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8
};

/* burst sensitivity as a function of peak SNR (3 dB steps) */
static Word16 burstcount_table[20] =
{
8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4
};

/* voice metric sensitivity as a function of peak SNR (3 dB steps) */
static Word16 vm_threshold_table[20] =
{
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432
};


/* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */

static Word16 noise_floor_chan[2] =	{NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1};
static Word16 min_chan_enrg[2] =	{MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1};
static Word16 ine_noise[2] = 		{INE_NOISE_0, INE_NOISE_1};
static Word16 fbits[2] = 		{FRACTIONAL_BITS_0, FRACTIONAL_BITS_1};
static Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R};

/* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */
static Word16 enrg_norm_shift[2] = 	{(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)};


Word16 vad2 (Word16 * farray_ptr, vadState2 * st){	/* Automatic variables */
	Word32 Lenrg;				/* scaled as 30,1 */
	Word32 Ltne;				       /* scaled as 22,9 */
	Word32 Ltce;				        /* scaled as 22,9 or 27,4 */
	Word16 tne_db;				/* scaled as 7,8 */	Word16 tce_db;				/* scaled as 7,8 */	Word16 input_buffer[FRM_LEN];		/* used for block normalising input data */	Word16 data_buffer[FFT_LEN];		/* used for in-place FFT */	Word16 ch_snr[NUM_CHAN];		/* scaled as 7,8 */	Word16 ch_snrq;				/* scaled as 15,0 (in 0.375 dB steps) */	Word16 vm_sum;				/* scaled as 15,0 */	Word16 ch_enrg_dev;			/* scaled as 7,8 */	Word32 Lpeak;				/* maximum channel energy */	Word16 p2a_flag;			      /* flag to indicate spectral peak-to-average ratio > 10 dB */
	Word16 ch_enrg_db[NUM_CHAN];		/* scaled as 7,8 */	Word16 ch_noise_db;			/* scaled as 7,8 */	Word16 alpha;				/* scaled as 0,15 */	Word16 one_m_alpha;			/* scaled as 0,15 */	Word16 update_flag;			/* set to indicate a background noise estimate update */	Word16 i, j, j1, j2;			/* Scratch variables */	Word16 hi1, lo1,hi2,lo2,hi3,lo3,hi4,lo4;

	Word16 tmp,max,tmp1,tmp2,tmp3;
	Word16 normb_shift;		 /* block norm shift count */
	Word16 ivad;			        /* intermediate VAD decision (return value) */
	Word16 tsnrq;			  /* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */
	Word16 xt;			        /* instantaneous frame SNR in dB, scaled as 7,8 */
	Word16 state_change;
      Word32  tmp32, Ltmp, Ltmp0,Ltmp1,Ltmp2,Ltmp3,Ltmp4,Ltmp5;
      Word16 *pFaray;
      vadState2 * pst;
      pFaray = farray_ptr;
      pst = st;
	  
	/* Increment frame counter */	st->Lframe_cnt ++;
	/* Block normalize the input */      //normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM);
	 max = -32768;
	for (i = 0; i < FRM_LEN; i+=4)
	{
		  tmp   = pFaray[i]   > 0 ?     pFaray[i] :  -pFaray[i];	
		  tmp1 = pFaray[i+1]> 0 ? pFaray[i+1] : -pFaray[i+1];	
		  tmp2 = pFaray[i+2]> 0 ? pFaray[i+2] : -pFaray[i+2];
		  tmp3 = pFaray[i+3]> 0 ? pFaray[i+3] : -pFaray[i+3];	

		  tmp   = tmp>tmp1 ? tmp :tmp1; 
		  tmp2 = tmp2 > tmp3 ? tmp2 :tmp3;
		  tmp   = tmp > tmp2 ? tmp :tmp2;
		  
		  max  = tmp > max ? tmp : max;

	}
	if (max != 0)	{		normb_shift = norm_s(max)- FFT_HEADROOM;
		for (i = 0; i < FRM_LEN; i+= 4 )
		{
			input_buffer[i    ] = normb_shift >0 ? pFaray[i    ]<<normb_shift : pFaray[i    ] >> -normb_shift;
			input_buffer[i+1] = normb_shift >0 ? pFaray[i+1]<<normb_shift : pFaray[i+1] >> -normb_shift;
			input_buffer[i+2] = normb_shift >0 ? pFaray[i+2]<<normb_shift : pFaray[i+2] >> -normb_shift;
			input_buffer[i+3] = normb_shift >0 ? pFaray[i+3]<<normb_shift : pFaray[i+3] >> -normb_shift;
		}	}	else	{		normb_shift = 14;
		memset(input_buffer, 0, 160);

	}
	/* Pre-emphasize the input data and store in the data buffer with the appropriate offset */       memset(data_buffer, 0 , 48);

	pst->pre_emp_mem = shr_r(pst->pre_emp_mem,  (pst->last_normb_shift -normb_shift));
	
	pst->last_normb_shift = normb_shift;							
	data_buffer[24] = input_buffer[0] + ( PRE_EMP_FAC*st->pre_emp_mem >>15 );	
	data_buffer[25] = input_buffer[1] + (PRE_EMP_FAC* input_buffer[0] >> 15 );
	data_buffer[26] = input_buffer[2] + (PRE_EMP_FAC* input_buffer[1] >> 15 );
	data_buffer[27] = input_buffer[3] + (PRE_EMP_FAC* input_buffer[2] >> 15 );
	for (i = 28, j = 4; i < 104; i+=4, j+=4)
	{		data_buffer[i] = input_buffer[j] + (PRE_EMP_FAC* input_buffer[j-1] >> 15 );
		data_buffer[i+1] = input_buffer[j+1] + (PRE_EMP_FAC* input_buffer[j] >> 15 );
		data_buffer[i+2] = input_buffer[j+2] + (PRE_EMP_FAC* input_buffer[j+1] >> 15 );
		data_buffer[i+3] = input_buffer[j+3] + (PRE_EMP_FAC* input_buffer[j+2] >> 15 );
		
	}
	
	st->pre_emp_mem = input_buffer[FRM_LEN-1];							

	memset(&data_buffer[104],0, 48);
	
	/* Perform FFT on the data buffer */	r_fft(data_buffer);	/* Use normb_shift factor to determine the scaling of the energy estimates */	state_change = ( (pst->shift_state == 0)&&( normb_shift <= 0 ) )||((pst->shift_state != 0)&&( normb_shift >= 3 ))? 1 : 0;
	pst->shift_state = (pst->shift_state == 0)&&( normb_shift <= 0 ) ? 1 : ((pst->shift_state != 0)&&( normb_shift >= 3 ) ? 0 : pst->shift_state);

	/* Scale channel energy estimate */							
	if (state_change)	{	       tmp = state_change_shift_r[pst->shift_state];
		pst->Lch_enrg[0]  = tmp >= 0 ? pst->Lch_enrg[0]  >>tmp :  ( pst->Lch_enrg[0] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[0]<< -tmp);
		pst->Lch_enrg[1]  = tmp >= 0 ? pst->Lch_enrg[1]  >>tmp :  ( pst->Lch_enrg[1] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[1]<< -tmp);	
		pst->Lch_enrg[2]  = tmp >= 0 ? pst->Lch_enrg[2]  >>tmp :  ( pst->Lch_enrg[2] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[2]<< -tmp);	
		pst->Lch_enrg[3]  = tmp >= 0 ? pst->Lch_enrg[3]  >>tmp:  ( pst->Lch_enrg[3] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[3]<< -tmp);	
		pst->Lch_enrg[4]  = tmp >= 0 ? pst->Lch_enrg[4]  >>tmp:  ( pst->Lch_enrg[4] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[4]<< -tmp);	
		pst->Lch_enrg[5]  = tmp >= 0 ? pst->Lch_enrg[5]  >>tmp :  ( pst->Lch_enrg[5] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[5]<< -tmp);	
		pst->Lch_enrg[6]  = tmp >= 0 ? pst->Lch_enrg[6]  >>tmp:  ( pst->Lch_enrg[6] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[6]<< -tmp);	
		pst->Lch_enrg[7]  = tmp >= 0 ? pst->Lch_enrg[7]  >>tmp :  ( pst->Lch_enrg[7] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[7]<< -tmp);	
		pst->Lch_enrg[8]  = tmp >= 0 ? pst->Lch_enrg[8]  >>tmp :  ( pst->Lch_enrg[8] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[8]<< -tmp);	
		pst->Lch_enrg[9]  = tmp >= 0 ? pst->Lch_enrg[9]  >>tmp :  ( pst->Lch_enrg[9] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[9]<< -tmp);	
		pst->Lch_enrg[10] = tmp >= 0 ? pst->Lch_enrg[10]>>tmp:  ( pst->Lch_enrg[10] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[10]<< -tmp);	
		pst->Lch_enrg[11] = tmp >= 0 ? pst->Lch_enrg[11]>>tmp :  ( pst->Lch_enrg[11] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[11]<< -tmp);	
		pst->Lch_enrg[12] = tmp >= 0 ? pst->Lch_enrg[12]>>tmp :  ( pst->Lch_enrg[12] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[12]<< -tmp);	
		pst->Lch_enrg[13] = tmp >= 0 ? pst->Lch_enrg[13]>>tmp :  ( pst->Lch_enrg[13] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[13]<< -tmp);	
		pst->Lch_enrg[14] = tmp >= 0 ? pst->Lch_enrg[14]>>tmp :  ( pst->Lch_enrg[14] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[14]<< -tmp);	
		pst->Lch_enrg[15] = tmp >= 0 ? pst->Lch_enrg[15]>>tmp :  ( pst->Lch_enrg[15] > (MAX_32 >>-tmp) ? MAX_32 :  pst->Lch_enrg[15]<< -tmp);	

	}	/* Estimate the energy in each channel */	alpha = (pst->Lframe_cnt == 1) ? 32767 : CEE_SM_FAC;
	one_m_alpha = (pst->Lframe_cnt == 1) ? 0 : ONE_MINUS_CEE_SM_FAC;
	
	for (i = LO_CHAN; i <= HI_CHAN; i++)	{		Lenrg = 0;										
		j1 = ch_tbl[i][0];									
		j2 = ch_tbl[i][1];									
		for (j = j1; j <= j2; j++)		{
			Lenrg +=  (data_buffer[2 * j]*data_buffer[2 * j] + data_buffer[2 * j + 1]*data_buffer[2 * j + 1])<<1;			
			
		}		/* Denorm energy & scale 30,1 according to the state */		Lenrg = L_shr_r(Lenrg,  ( (normb_shift<<1) - enrg_norm_shift[st->shift_state] ));
		/* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */
		
		tmp = (alpha*ch_tbl_sh[i] >> 15 );
		
		hi1 = (Word16)(Lenrg >> 16);
		lo1 = (Lenrg - (hi1<<16))>>1;

		Ltmp = (hi1*tmp<<1) + ((lo1*tmp>>15)<<1);

		hi1 = (Word16)(pst->Lch_enrg[i] >> 16);
		lo1 = (pst->Lch_enrg[i] - (hi1<<16))>>1;
		

		pst->Lch_enrg[i] =  Ltmp + (hi1*one_m_alpha<<1) +(( lo1*one_m_alpha>>15)<<1);	
              pst->Lch_enrg[i] = pst->Lch_enrg[i]  <  min_chan_enrg[pst->shift_state]  ? min_chan_enrg[pst->shift_state] : pst->Lch_enrg[i] ;

	}	/* Compute the total channel energy estimate (Ltce) */
	Ltmp0   = pst->Lch_enrg[0] +pst->Lch_enrg[1] +pst->Lch_enrg[2] +pst->Lch_enrg[3];
	Ltmp1 = pst->Lch_enrg[4] +pst->Lch_enrg[5] +pst->Lch_enrg[6] +pst->Lch_enrg[7];
	Ltmp2 = pst->Lch_enrg[8] +pst->Lch_enrg[9] +pst->Lch_enrg[10] +pst->Lch_enrg[11];
	Ltmp3 = pst->Lch_enrg[12] +pst->Lch_enrg[13] +pst->Lch_enrg[14] +pst->Lch_enrg[15];
	Ltce = Ltmp0+Ltmp1+Ltmp2+Ltmp3;

	/* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */	Lpeak = 0;	
	Ltmp0 = pst->Lch_enrg [2] > pst->Lch_enrg [3] ? pst->Lch_enrg [2]  : pst->Lch_enrg [3] ;
	Lpeak = Ltmp0 > Lpeak ? Ltmp0 : Lpeak;
	
      /* Sine waves not valid for low frequencies */	
	Ltmp0 = pst->Lch_enrg [4] > pst->Lch_enrg [5] ? pst->Lch_enrg [4]  : pst->Lch_enrg [5] ;
	Ltmp1 = pst->Lch_enrg [6] > pst->Lch_enrg [7] ? pst->Lch_enrg [6]  : pst->Lch_enrg [7] ;
	Ltmp2 = pst->Lch_enrg [8] > pst->Lch_enrg [9] ? pst->Lch_enrg [8]  : pst->Lch_enrg [9] ;
	Ltmp3 = pst->Lch_enrg [10] > pst->Lch_enrg [11] ? pst->Lch_enrg [10]  : pst->Lch_enrg [11] ;
	Ltmp4 = pst->Lch_enrg [12] > pst->Lch_enrg [13] ? pst->Lch_enrg [12]  : pst->Lch_enrg [13] ;
	Ltmp5 = pst->Lch_enrg [14] > pst->Lch_enrg [15] ? pst->Lch_enrg [14]  : pst->Lch_enrg [15] ;

	Ltmp0 = Ltmp0>Ltmp1 ? Ltmp0 :Ltmp1;   Ltmp2 = Ltmp2> Ltmp3 ? Ltmp2 : Ltmp3;
	Ltmp4 = Ltmp4>Ltmp5 ? Ltmp4 : Ltmp5;  Ltmp0 = Ltmp0>Ltmp2 ? Ltmp0 : Ltmp2;
	Ltmp0 = Ltmp0> Ltmp4 ? Ltmp0 :Ltmp4;
	Lpeak = Ltmp0 > Lpeak ? Ltmp0 : Lpeak;
	
	/* Set p2a_flag if peak (dB) > average channel energy (dB) + 10 dB */	/*   Lpeak > Ltce/num_channels * 10^(10/10)                        */	/*   Lpeak > (10/16)*Ltce                                          */
	hi1 = (Word16)(Ltce>>16);
	lo1 = (Ltce - (hi1<<16 ))>>1;
	Ltmp = hi1*40960 + ((lo1*20480 >>15)<<1);
	p2a_flag = Lpeak > Ltmp ? 1 : 0;

	/* Initialize channel noise estimate to either the channel energy or fixed level  */	/*   Scale the energy appropriately to yield state 0 (22,9) scaling for noise */													
	if (pst->Lframe_cnt <= 4 )
	{												
		if (p2a_flag == 1)
		{
			pst->Lch_noise[0]   = pst->Lch_noise[1]  = pst->Lch_noise[2]  = pst->Lch_noise[3] =
			pst->Lch_noise[4]   = pst->Lch_noise[5]  = pst->Lch_noise[6]  = pst->Lch_noise[7] =
			pst->Lch_noise[8]   = pst->Lch_noise[9]  = pst->Lch_noise[10] = pst->Lch_noise[11] =
			pst->Lch_noise[12] = pst->Lch_noise[13] = pst->Lch_noise[14] = pst->Lch_noise[15] = INE_NOISE_0;						
			
		}		else		{			for (i = LO_CHAN; i <= HI_CHAN; i++)			{										
				if ( pst->Lch_enrg[i] <  ine_noise[pst->shift_state])
				{					pst->Lch_noise[i] = INE_NOISE_0;					
				}				else				{
				        pst->Lch_noise[i] =  (pst->shift_state == 1) ? (pst->Lch_enrg[i] > 0x03ffffff ? MAX_32 : pst->Lch_enrg[i] << 5) : pst->Lch_enrg[i];
			
				}			}		}	}	/* Compute the channel energy (in dB), the channel SNRs, and the sum of voice metrics */	vm_sum = 0;										
	for (i = LO_CHAN; i <= HI_CHAN; i++)	{		ch_enrg_db[i] = fn10Log10(st->Lch_enrg[i], fbits[st->shift_state]);		
		ch_noise_db   = fn10Log10(st->Lch_noise[i], FRACTIONAL_BITS_0);
		ch_snr[i] =  ch_enrg_db[i] - ch_noise_db ;					
		/* quantize channel SNR in 3/8 dB steps (scaled 7,8 => 15,0) */
		ch_snrq = shr_r((21845*ch_snr[i] >> 15 ), 6);
		/* Accumulate the sum of voice metrics	*/							
 		 j = ch_snrq > 89 ? 89 : ( ch_snrq < 0? 0 : ch_snrq );
		 vm_sum += vm_tbl[j] ;
		
	}	/* Initialize NOMINAL peak voice energy and average noise energy, calculate instantaneous SNR */ 												
	if ( (st->Lframe_cnt <= 4)  || st->fupdate_flag == 1)
	{		/* tce_db = (96 - 22 - 10*log10(64) (due to FFT)) scaled as 7,8 */		tce_db = 14320;										
		pst->negSNRvar = 0;									
		pst->negSNRbias = 0;									
		/* Compute the total noise estimate (Ltne) */		//Ltne = 0;		
		Ltmp0 =  pst->Lch_noise[0] +pst->Lch_noise[1]+pst->Lch_noise[2]+pst->Lch_noise[3];
		Ltmp1 =  pst->Lch_noise[4] +pst->Lch_noise[5]+pst->Lch_noise[6]+pst->Lch_noise[7];
		Ltmp2 =  pst->Lch_noise[8] +pst->Lch_noise[9]+pst->Lch_noise[10]+pst->Lch_noise[11];
		Ltmp3 =  pst->Lch_noise[12] +pst->Lch_noise[13]+pst->Lch_noise[14]+pst->Lch_noise[15];
		Ltne    = Ltmp0+Ltmp1+Ltmp2+Ltmp3;
	
		/* Get total noise in dB */		tne_db = fn10Log10(Ltne, FRACTIONAL_BITS_0);		/* Initialise instantaneous and long-term peak signal-to-noise ratios */		xt = tce_db - tne_db ;
		st->tsnr = xt;										
	}	else	{		/* Calculate instantaneous frame signal-to-noise ratio */		/* xt = 10*log10( sum(2.^(ch_snr*0.1*log2(10)))/length(ch_snr) ) */
		//Ltmp1 = 0;	
		Ltmp0 = 0;
		for (i=LO_CHAN; i<=HI_CHAN; i+=4) 
	      {
			
			Ltmp1 = (ch_snr[i]*10885 >> 7);			
			Ltmp2 = (ch_snr[i+1]*10885 >> 7);	
			Ltmp3 = (ch_snr[i+2]*10885 >> 7);	
			Ltmp4 = (ch_snr[i+3]*10885 >> 7);	
			
			hi1 = (Word16)(Ltmp1 >> 16);  hi2 = (Word16)(Ltmp2 >> 16);hi3 = (Word16)(Ltmp3 >> 16); hi4 = (Word16)(Ltmp4 >> 16);
			lo1 = (Ltmp1 - (hi1<<16))>>1;		lo2 = (Ltmp2 - (hi2<<16))>>1;
			lo3 = (Ltmp3 - (hi3<<16))>>1;		lo4 = (Ltmp4 - (hi4<<16))>>1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -