vad2.c

来自「arm音频编解码库」· C语言代码 · 共 915 行 · 第 1/2 页
915 行
/*******************************************************************************                                                                        *      GSM AMR-NB speech codec   R98   Version 7.6.0   December 12, 2001*                                R99   Version 3.3.0                *                                REL-4 Version 4.1.0                ********************************************************************************      File             : vad2.c*      Purpose          : Voice Activity Detection (VAD) for AMR (option 2)*******************************************************************************//*******************************************************************************                         VERSION ID******************************************************************************/const char vad2_id[] = "@(#)$Id $";/*************************************************************************** * *   FUNCTION NAME: vad2() * *   PURPOSE: *     This function provides the Voice Activity Detection function option 2 *     for the Adaptive Multi-rate (AMR) codec. * *   INPUTS: * *     farray_ptr *                     pointer to Word16[80] input array *     vadState2 *                     pointer to vadState2 state structure * *   OUTPUTS: * *     state variables are updated * *   RETURN VALUE: * *     Word16 *                     VAD(m) - two successive calls to vad2() yield *                     the VAD decision for the 20 ms frame: *                     VAD_flag = VAD(m-1) || VAD(m) * * *************************************************************************//* Includes */#include <stdio.h>#include <stdlib.h>#include "typedef.h"#include "cnst.h"#include "basic_op.h"#include "oper_32b.h"#include "count.h"#include "log2.h"#include "pow2.h"#include "vad2.h"/* Local functions *//*************************************************************************** * *   FUNCTION NAME: fn10Log10 * *   PURPOSE: *     The purpose of this function is to take the 10*log base 10 of input and *     divide by 128 and return; i.e. output = 10*log10(input)/128 (scaled as 7,8) * *   INPUTS: * *     L_Input *                     input (scaled as 31-fbits,fbits) *     fbits *                     number of fractional bits on input * *   OUTPUTS: * *     none * *   RETURN VALUE: * *     Word16 *                     output (scaled as 7,8) * *   DESCRIPTION: * *     10*log10(x)/128 = 10*(log10(2) * (log2(x<<fbits)-log2(1<<fbits)) >> 7 *                     = 3.0103 * (log2(x<<fbits) - fbits) >> 7 *                     = ((3.0103/4.0 * (log2(x<<fbits) - fbits) << 2) >> 7 *                     = (3.0103/4.0 * (log2(x<<fbits) - fbits) >> 5 * *************************************************************************/Word16 fn10Log10 (Word32 L_Input, Word16 fbits){	Word16 integer;		/* Integer part of Log2.   (range: 0<=val<=30) */	Word16 fraction;	/* Fractional part of Log2. (range: 0<=val<1) */	Word32 Ltmp;	Word16 tmp;        Log2(L_Input, &integer, &fraction);	integer = sub(integer, fbits);	Ltmp = Mpy_32_16 (integer, fraction, 24660);	/* 24660 = 10*log10(2)/4 scaled 0,15 */	Ltmp = L_shr_r(Ltmp, 5+1);			/* extra shift for 30,1 => 15,0 extract correction */        tmp = extract_l(Ltmp);        return (tmp);}/*************************************************************************** * *   FUNCTION NAME: block_norm * *   PURPOSE: *     The purpose of this function is block normalise the input data sequence * *   INPUTS: * *     &in[0] *                     pointer to data sequence to be normalised *     length *                     number of elements in data sequence *     headroom *                     number of headroom bits (i.e.,  * *   OUTPUTS: * *     &out[0] *                     normalised output data sequence pointed to by &out[0] * *   RETURN VALUE: * *     Word16 *                     number of bits sequence was left shifted * *   DESCRIPTION: * *                     1) Search for maximum absolute valued data element *                     2) Normalise the max element with "headroom" *                     3) Transfer/shift the input sequence to the output buffer *                     4) Return the number of left shifts * *   CAVEATS: *                     An input sequence of all zeros will return the maximum *                     number of left shifts allowed, NOT the value returned *                     by a norm_s(0) call, since it desired to associate an *                     all zeros sequence with low energy. * *************************************************************************/Word16 block_norm (Word16 * in, Word16 * out, Word16 length, Word16 headroom){	Word16 i, max, scnt, adata;        max = abs_s(in[0]);	for (i = 1; i < length; i++)	{                adata = abs_s(in[i]);                           test();		if (sub(adata, max) > 0)		{			max = adata;				move16();		}	}	test();	if (max != 0)	{		scnt = sub(norm_s(max), headroom);		for (i = 0; i < length; i++)		{			out[i] = shl(in[i], scnt);	       	move16();		}	}	else	{		scnt = sub(16, headroom);		for (i = 0; i < length; i++)		{			out[i] = 0;                             move16();		}	}	return (scnt);}/********************************************* The VAD function ***************************************************/Word16 vad2 (Word16 * farray_ptr, vadState2 * st){	/*	 * The channel table is defined below.  In this table, the	 * lower and higher frequency coefficients for each of the 16	 * channels are specified.  The table excludes the coefficients	 * with numbers 0 (DC), 1, and 64 (Foldover frequency).	 */	static Word16 ch_tbl[NUM_CHAN][2] =	{		{2, 3},		{4, 5},		{6, 7},		{8, 9},		{10, 11},		{12, 13},		{14, 16},		{17, 19},		{20, 22},		{23, 26},		{27, 30},		{31, 35},		{36, 41},		{42, 48},		{49, 55},		{56, 63}	};	/* channel energy scaling table - allows efficient division by number         * of DFT bins in the channel: 1/2, 1/3, 1/4, etc.	 */	static Word16 ch_tbl_sh[NUM_CHAN] =	{		16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923,		10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096	};	/*	 * The voice metric table is defined below.  It is a non-	 * linear table with a deadband near zero.  It maps the SNR	 * index (quantized SNR value) to a number that is a measure	 * of voice quality.	 */	static Word16 vm_tbl[90] =	{		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,		3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7,		8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15,		15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24,		24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34,		35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45,		46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50,		50, 50	};	/* hangover as a function of peak SNR (3 dB steps) */	static Word16 hangover_table[20] =	{		30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8	};	/* burst sensitivity as a function of peak SNR (3 dB steps) */	static Word16 burstcount_table[20] =	{		8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4	};	/* voice metric sensitivity as a function of peak SNR (3 dB steps) */	static Word16 vm_threshold_table[20] =	{                34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432	};	/* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */	static Word16 noise_floor_chan[2] =	{NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1};	static Word16 min_chan_enrg[2] =	{MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1};	static Word16 ine_noise[2] = 		{INE_NOISE_0, INE_NOISE_1};	static Word16 fbits[2] = 		{FRACTIONAL_BITS_0, FRACTIONAL_BITS_1};	static Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R};	/* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */	static Word16 enrg_norm_shift[2] = 	{(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)};	/* Automatic variables */	Word32 Lenrg;				/* scaled as 30,1 */	Word32 Ltne;				/* scaled as 22,9 */	Word32 Ltce;				/* scaled as 22,9 or 27,4 */	Word16 tne_db;				/* scaled as 7,8 */	Word16 tce_db;				/* scaled as 7,8 */	Word16 input_buffer[FRM_LEN];		/* used for block normalising input data */	Word16 data_buffer[FFT_LEN];		/* used for in-place FFT */	Word16 ch_snr[NUM_CHAN];		/* scaled as 7,8 */	Word16 ch_snrq;				/* scaled as 15,0 (in 0.375 dB steps) */	Word16 vm_sum;				/* scaled as 15,0 */	Word16 ch_enrg_dev;			/* scaled as 7,8 */	Word32 Lpeak;				/* maximum channel energy */	Word16 p2a_flag;			/* flag to indicate spectral peak-to-average ratio > 10 dB */	Word16 ch_enrg_db[NUM_CHAN];		/* scaled as 7,8 */	Word16 ch_noise_db;			/* scaled as 7,8 */	Word16 alpha;				/* scaled as 0,15 */	Word16 one_m_alpha;			/* scaled as 0,15 */	Word16 update_flag;			/* set to indicate a background noise estimate update */	Word16 i, j, j1, j2;			/* Scratch variables */	Word16 hi1, lo1;	Word32 Ltmp, Ltmp1, Ltmp2;	Word16 tmp;	Word16 normb_shift;		/* block norm shift count */	Word16 ivad;			/* intermediate VAD decision (return value) */	Word16 tsnrq;			/* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */	Word16 xt;			/* instantaneous frame SNR in dB, scaled as 7,8 */	Word16 state_change;	/* Increment frame counter */	st->Lframe_cnt = L_add(st->Lframe_cnt, 1);	/* Block normalize the input */	normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM);	/* Pre-emphasize the input data and store in the data buffer with the appropriate offset */	for (i = 0; i < DELAY; i++)	{		data_buffer[i] = 0;									move16();	}	st->pre_emp_mem = shr_r(st->pre_emp_mem, sub(st->last_normb_shift, normb_shift));	st->last_normb_shift = normb_shift;								move16();	data_buffer[DELAY] = add(input_buffer[0], mult(PRE_EMP_FAC, st->pre_emp_mem));			move16();	for (i = DELAY + 1, j = 1; i < DELAY + FRM_LEN; i++, j++)	{		data_buffer[i] = add(input_buffer[j], mult(PRE_EMP_FAC, input_buffer[j-1]));		move16();	}	st->pre_emp_mem = input_buffer[FRM_LEN-1];							move16();	for (i = DELAY + FRM_LEN; i < FFT_LEN; i++)	{		data_buffer[i] = 0;									move16();	}	/* Perform FFT on the data buffer */	r_fft(data_buffer);	/* Use normb_shift factor to determine the scaling of the energy estimates */	state_change = 0;										move16();													test();	if (st->shift_state == 0)	{												test();		if (sub(normb_shift, -FFT_HEADROOM+2) <= 0)		{			state_change = 1;								move16();			st->shift_state = 1;								move16();		}	}	else	{												test();		if (sub(normb_shift, -FFT_HEADROOM+5) >= 0)		{			state_change = 1;								move16();			st->shift_state = 0;								move16();		}	}	/* Scale channel energy estimate */								test();	if (state_change)	{		for (i = LO_CHAN; i <= HI_CHAN; i++)		{			st->Lch_enrg[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[st->shift_state]);	move32();		}	}	/* Estimate the energy in each channel */													test();	if (L_sub(st->Lframe_cnt, 1) == 0)	{		alpha = 32767;										move16();		one_m_alpha = 0;									move16();	}	else	{		alpha = CEE_SM_FAC;									move16();		one_m_alpha = ONE_MINUS_CEE_SM_FAC;							move16();	}	for (i = LO_CHAN; i <= HI_CHAN; i++)	{		Lenrg = 0;										move16();		j1 = ch_tbl[i][0];									move16();		j2 = ch_tbl[i][1];									move16();		for (j = j1; j <= j2; j++)		{			Lenrg = L_mac(Lenrg, data_buffer[2 * j], data_buffer[2 * j]);			Lenrg = L_mac(Lenrg, data_buffer[2 * j + 1], data_buffer[2 * j + 1]);		}		/* Denorm energy & scale 30,1 according to the state */		Lenrg = L_shr_r(Lenrg, sub(shl(normb_shift, 1), enrg_norm_shift[st->shift_state]));		/* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */		tmp = mult(alpha, ch_tbl_sh[i]);		L_Extract (Lenrg, &hi1, &lo1);		Ltmp = Mpy_32_16(hi1, lo1, tmp);		L_Extract (st->Lch_enrg[i], &hi1, &lo1);		st->Lch_enrg[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, one_m_alpha));			move32();													test();		if (L_sub(st->Lch_enrg[i], min_chan_enrg[st->shift_state]) < 0)		{			st->Lch_enrg[i] = min_chan_enrg[st->shift_state];				move32();		}	}	/* Compute the total channel energy estimate (Ltce) */	Ltce = 0;											move16();	for (i = LO_CHAN; i <= HI_CHAN; i++)	{		Ltce = L_add(Ltce, st->Lch_enrg[i]);	}	/* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */	Lpeak = 0;											move32();	for (i = LO_CHAN+2; i <= HI_CHAN; i++)	/* Sine waves not valid for low frequencies */	{												test();		if (L_sub(st->Lch_enrg [i], Lpeak) > 0)		{
vad2.c - 源码说明

本页面展示了「arm音频编解码库」中的 vad2.c 源码文件，采用 C语言编程语言编写，共 915 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与arm相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?