📄 sp_enc.cpp

📁 实现了录音,放音功能!在evc4.0下编译功过,wince5.0下能正常录音,放音,暂停录放音!
💻 CPP
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
}
//#endif

/*
* Pitch_ol
*
*
* Parameters:
*    mode           I: AMR mode
*    vadSt          B: VAD state struct
*    signal         I: signal used to compute the open loop pitch
*                                                 [[-pit_max]:[-1]]
*    pit_min        I: minimum pitch lag
*    pit_max        I: maximum pitch lag
*    L_frame        I: length of frame to compute pitch
*    dtx            I: DTX flag
*    idx            I: frame index
*
* Function:
*    Compute the open loop pitch lag.
*
*    Open-loop pitch analysis is performed twice per frame (each 10 ms)
*    to find two estimates of the pitch lag in each frame.
*    Open-loop pitch analysis is performed as follows.
*    In the first step, 3 maxima of the correlation:
*
*          79
*    O(k) = SUM Sw(n)*Sw(n-k)
*          n=0
*
*    are found in the three ranges:
*       pit_min     ...      2*pit_min-1
*       2*pit_min   ...      4*pit_min-1
*       4*pit_min   ...      pit_max
*
*    The retained maxima O(t(i)), i = 1, 2, 3, are normalized by dividing by
*
*    SQRT[SUM[POW(Sw(n-t(i)), 2]], i = 1, 2, 3,
*         n
*
*    respectively.
*    The normalized maxima and corresponding delays are denoted by
*    (LP_ORDER(i), t(i)), i = 1, 2, 3. The winner, Top, among the three normalized
*    correlations is selected by favouring the delays with the values
*    in the lower range. This is performed by weighting the normalized
*    correlations corresponding to the longer delays. The best
*    open-loop delay Top is determined as follows:
*
*    Top = t(1)
*    LP_ORDER(Top) = LP_ORDER(1)
*    if LP_ORDER(2) > 0.85 * LP_ORDER(Top)
*       LP_ORDER(Top) = LP_ORDER(2)
*       Top = t(2)
*    end
*    if LP_ORDER(3) > 0.85 * LP_ORDER(Top)
*       LP_ORDER(Top) = LP_ORDER(3)
*       Top = t(3)
*    end
*
* Returns:
*    void
*/
static INT32 Pitch_ol( enum Mode mode, vadState *vadSt, float signal[], INT32 pit_min, 
					   INT32 pit_max, INT16 L_frame, INT32 dtx, INT16 idx )
{
	float corr[PIT_MAX + 1];
	float max1, max2, max3, p_max1, p_max2, p_max3;
	float *corr_ptr;
	INT32 i, j;
	/*
	#ifdef VAD2
	float r01, r02, r03;
	float rmax1, rmax2, rmax3;
	#else
	*/
	float corr_hp_max;
	//#endif
	
	
	//#ifndef VAD2
	if ( dtx )
	{
		/* update tone detection */
		if ( ( mode == MR475 ) || ( mode == MR515 ) )
		{
			vad_tone_detection_update( vadSt, 1 );
		}
		else 
		{
			vad_tone_detection_update( vadSt, 0 );
		}
	}
	//#endif
	
	corr_ptr = &corr[pit_max];
	
	/*        79             */
	/* O(k) = SUM Sw(n)*Sw(n-k)   */
	/*        n=0               */
	comp_corr( signal, L_frame, pit_max, pit_min, corr_ptr );
	
	/*
	#ifdef VAD2
	/ * Find a maximum for each section.	* /
	/ * Maxima 1	* /
	j = pit_min << 2;
	p_max1 =
	Lag_max( corr_ptr, signal, L_frame, pit_max, j, &max1, dtx, &rmax1, &r01 );
	
	  / * Maxima 2	* /
	  i = j - 1;
	  j = pit_min << 1;
	  p_max2 = Lag_max( corr_ptr, signal, L_frame, i, j, &max2, dtx, &rmax2, &r02 );
	  
		/ * Maxima 3	* /
		i = j - 1;
		p_max3 =
		Lag_max( corr_ptr, signal, L_frame, i, pit_min, &max3, dtx, &rmax3, &r03 );
		#else
	*/
	/* Find a maximum for each section.	*/
	/* Maxima 1	*/
	j = pit_min << 2;
	p_max1 = Lag_max( vadSt, corr_ptr, signal, L_frame, pit_max, j, &max1, dtx );
	
	/* Maxima 2 */
	i = j - 1;
	j = pit_min << 1;
	p_max2 = Lag_max( vadSt, corr_ptr, signal, L_frame, i, j, &max2, dtx );
	
	/* Maxima 3 */
	i = j - 1;
	p_max3 = Lag_max( vadSt, corr_ptr, signal, L_frame, i, pit_min, &max3, dtx );
	
	if ( dtx ) {
		if ( idx == 1 ) {
			/* calculate max high-passed filtered correlation of all lags */
			hp_max( corr_ptr, signal, L_frame, pit_max, pit_min, &corr_hp_max );
			
			/* update complex background detector */
			vadSt->best_corr_hp = corr_hp_max * 0.5F;
		}
	}
	//#endif
	
	/* The best open-loop delay */
	if ( ( max1 * 0.85F ) < max2 )
	{
		max1 = max2;
		p_max1 = p_max2;
		/*
		#ifdef VAD2
		if (dtx) {
		rmax1 = rmax2;
		r01 = r02;
		}
		#endif
		*/
	}
	
	if ( ( max1 * 0.85F ) < max3 )
	{
		p_max1 = p_max3;
		/*
		#ifdef VAD2
		if (dtx) {
		rmax1 = rmax3;
		r01 = r03;
		}
		#endif
		*/
	}
	/*
	#ifdef VAD2
	if (dtx) {
	vadSt->Rmax += rmax1;   / * Save max correlation * /
	vadSt->R0   += r01;     / * Save max energy * /
	}
	#endif
	*/
	return( INT32 )p_max1;
}


/*
* Lag_max_wght
*
*
* Parameters:
*    vadSt          B: vad structure
*    corr           I: correlation vector
*    signal         I: signal
*    L_frame        I: length of frame to compute pitch
*    old_lag        I: old open-loop lag
*    cor_max        O: maximum correlation
*    wght_flg       I: weighting function flag
*    gain_flg       O: open-loop flag
*    dtx            I: dtx on/off
*
* Function:
*    Find the lag that has maximum correlation of signal in a given delay range.
*    maximum lag = 143
*    minimum lag = 20
*
* Returns:
*    p_max             lag found
*/
static INT32 Lag_max_wght( vadState *vadSt, float corr[], float signal[], INT32 old_lag,
						   INT32 *cor_max, INT32 wght_flg, float *gain_flg, INT32 dtx )
{
	float t0, t1, max;
	float *psignal, *p1signal;
	const float *ww, *we;
	INT32 i, j, p_max;
	
	
	ww = &corrweight[250];
	we = &corrweight[266 - old_lag];
	max = -FLT_MAX;
	p_max = PIT_MAX;
	
	/* see if the neigbouring emphasis is used */
	if ( wght_flg > 0 )
	{
		/* find maximum correlation with weighting */
		for ( i = PIT_MAX; i >= PIT_MIN; i-- ) 
		{
			/* Weighting of the correlation function. */
			t0 = corr[ - i] * *ww--;
			/* Weight the neighbourhood of the old lag. */
			t0 *= *we--;
			
			if ( t0 >= max ) 
			{
				max = t0;
				p_max = i;
			}
		}
		
	}
	else 
	{
		/* find maximum correlation with weighting */
		for ( i = PIT_MAX; i >= PIT_MIN; i-- )
		{
			/* Weighting of the correlation function. */
			t0 = corr[ - i] * *ww--;
			
			if ( t0 >= max )
			{
				max = t0;
				p_max = i;
			}
		}
		
	}
	psignal = &signal[0];
	p1signal = &signal[ - p_max];
	t0 = 0;
	t1 = 0;
	
	/* Compute energy */
	for ( j = 0; j < FRAME_SIZE_BY2; j++, psignal++, p1signal++ )
	{
		t0 += *psignal * *p1signal;
		t1 += *p1signal * *p1signal;
	}
	
	if ( dtx )
	{
	/*
	#ifdef VAD2
	vadSt->Rmax += t0;   / * Save max correlation * /
	vadSt->R0   += t1;   / * Save max energy * /
	#else
		*/
		/* update and detect tone */
		vad_tone_detection_update( vadSt, 0 );
		vad_tone_detection( vadSt, t0, t1 );
		//#endif
	}
	
	/*
    * gain flag is set according to the open_loop gain
    * is t2/t1 > 0.4 ?
    */
	*gain_flg = t0 - ( t1 * 0.4F );
	*cor_max = 0;
	return( p_max );
}


/*
* gmed_n
*
*
* Parameters:
*    ind               I: values
*    n                 I: The number of gains
*
* Function:
*    Calculates N-point median.
*
* Returns:
*    index of the median value
*/
static INT32 gmed_n( INT32 ind[], INT32 n )
{
	INT32 i, j, ix = 0;
	INT32 max;
	INT32 medianIndex;
	INT32 tmp[9];
	INT32 tmp2[9];
	
	
	for ( i = 0; i < n; i++ ) 
	{
		tmp2[i] = ind[i];
	}
	
	for ( i = 0; i < n; i++ ) 
	{
		max = -32767;
		
		for ( j = 0; j < n; j++ ) 
		{
			if ( tmp2[j] >= max ) 
			{
				max = tmp2[j];
				ix = j;
			}
		}
		tmp2[ix] = -32768;
		tmp[i] = ix;
	}
	medianIndex = tmp[( n >>1 )];
	return( ind[medianIndex] );
}


/*
* Pitch_ol_wgh
*
*
* Parameters:
*    old_T0_med     O: old Cl lags median
*    wght_flg       I: weighting function flag
*    ada_w          B:
*    vadSt          B: VAD state struct
*    signal         I: signal used to compute the open loop pitch
*                                                  [[-pit_max]:[-1]]
*    old_lags       I: history with old stored Cl lags
*    ol_gain_flg    I: OL gain flag
*    idx            I: frame index
*    dtx            I: DTX flag
*
* Function:
*    Open-loop pitch search with weight
*
*    Open-loop pitch analysis is performed twice per frame (every 10 ms)
*    for the 10.2 kbit/s mode to find two estimates of the pitch lag
*    in each frame. The open-loop pitch analysis is done in order to simplify
*    the pitch analysis and confine the closed loop pitch search to
*    a small number of lags around the open-loop estimated lags.
*    Open-loop pitch estimation is based on the weighted speech signal
*    which is obtained by filtering the input speech signal through
*    the weighting filter.
*    The correlation of weighted speech is determined.
*    The estimated pitch-lag is the delay that maximises
*    the weighted autocorrelation function. To enhance  pitch-lag analysis
*    the autocorrelation function estimate is modified by a weighting window.
*    The weighting emphasises relevant pitch-lags, thus increasing
*    the likelihood of selecting the correct delay.
*    minimum pitch lag = 20
*    maximum pitch lag = 143
*
* Returns:
*    p_max1            open loop pitch lag
*/
static INT32 Pitch_ol_wgh( INT32 *old_T0_med, INT16 *wght_flg, float *ada_w, 
						   vadState *vadSt, float signal[], INT32 old_lags[], 
						   float ol_gain_flg[], INT16 idx, INT32 dtx )
{
	float corr[PIT_MAX + 1];
	//#ifndef VAD2
	float corr_hp_max;
	//#endif
	float *corrPtr;
	INT32 i, max1, p_max1;
	
	
	/* calculate all coreelations of signal, from pit_min to pit_max */
	corrPtr = &corr[PIT_MAX];
	comp_corr( signal, FRAME_SIZE_BY2, PIT_MAX, PIT_MIN, corrPtr );
	p_max1 = Lag_max_wght( vadSt, corrPtr, signal, *old_T0_med,
		&max1, *wght_flg, &ol_gain_flg[idx], dtx );
	
	if ( ol_gain_flg[idx] > 0 )
	{
		/* Calculate 5-point median of previous lags */
		/* Shift buffer */
		for ( i = 4; i > 0; i-- )
		{
			old_lags[i] = old_lags[i - 1];
		}
		old_lags[0] = p_max1;
		*old_T0_med = gmed_n( old_lags, 5 );
		*ada_w = 1;
	}
	else
	{
		*old_T0_med = p_max1;
		*ada_w = *ada_w * 0.9F;
	}
	
	if ( *ada_w < 0.3 ) 
	{
		*wght_flg = 0;
	}
	else
	{
		*wght_flg = 1;
	}
	
	//#ifndef VAD2
	if ( dtx )
	{
		if ( idx == 1 )
		{
			/* calculate max high-passed filtered correlation of all lags */
			hp_max( corrPtr, signal, FRAME_SIZE_BY2, PIT_MAX, PIT_MIN, &corr_hp_max );
			
			/* update complex background detector */
			vadSt->best_corr_hp = corr_hp_max * 0.5F;
		}
	}
	//#endif
	return( p_max1 );
}


/*
* ol_ltp
*
*
* Parameters:
*    mode              I: AMR mode
*    vadSt             B: VAD state struct
*    wsp               I: signal used to compute the OL pitch
*    T_op              O: open loop pitch lag
*    ol_gain_flg       I: OL gain flag
*    old_T0_med        O: old Cl lags median
*    wght_flg          I: weighting function flag
*    ada_w             B:
*    old_lags          I: history with old stored Cl lags
*    ol_gain_flg       I: OL gain flag
*    dtx               I: DTX flag
*    idx               I: frame index
*
* Function:
*    Compute the open loop pitch lag.
*
*    Open-loop pitch analysis is performed in order to simplify
*    the pitch analysis and confine the closed-loop pitch search to
*    a small number of lags around the open-loop estimated lags.
*    Open-loop pitch estimation is based on the weighted speech signal Sw(n)
*    which is obtained by filtering the input speech signal through
*    the weighting filter W(z) = A(z/g1) / A(z/g2). That is,
*    in a subframe of size L, the weighted speech is given by:
*
*                10
*    Sw(n) = S(n) + SUM[ a(i) * g1(i) * S(n-i) ]
*                i=1
*                   10
*                - SUM[ a(i) * g2(i) * Sw(n-i) ], n = 0, ..., L-1
*                  i=1
*
* Returns:
*    void
*/
static void ol_ltp( enum Mode mode, vadState *vadSt, float wsp[], INT32 *T_op,
							   float ol_gain_flg[], INT32 *old_T0_med, INT16 *wght_flg, 
							   float *ada_w, INT32 *old_lags, INT32 dtx, INT16 idx )
{
	if ( mode != MR102 )
	{
		ol_gain_flg[0] = 0;
		ol_gain_flg[1] = 0;
	}
	
	if ( ( mode == MR475 ) || ( mode == MR515 ) ) 
	{
		*T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN, PIT_MAX, FRAME_SIZE, dtx, idx );
	}
	else 
	{
		if ( mode <= MR795 ) 
		{
									   *T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN, PIT_MAX, FRAME_SIZE_BY2, dtx, idx );
		}
		else if ( mode == MR102 )
		{
									   *T_op = Pitch_ol_wgh( old_T0_med, wght_flg, ada_w, vadSt,
										   wsp, old_lags, ol_gain_flg, idx, dtx );
		}
		else 
		{
									   *T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN_MR122, 
										   PIT_MAX, FRAME_SIZE_BY2, dtx, idx );
		}
	}
}


/*
* subframePreProc
*
*
* Parameters:
*    mode           I: AMR mode
*    gamma1         I: spectral exp. factor 1
*    gamma1_12k2    I: spectral exp. factor 1 for EFR
*    gamma2         I: spectral exp. factor 2
*    A              I: A(z) unquantized for the 4 subframes
*    Aq             I: A(z)   quantized for the 4 subframes
*    speech         I: speech segment
*    mem_err        I: pointer to error signal
*    mem_w0         I: memory of weighting filter
*    zero           I: pointer to zero vector
*    ai_zero        O: history of weighted synth. filter
*    exc            O: INT32 term prediction residual
*    h1             O: impulse response
*    xn             O: target vector for pitch search
*    res2           O: INT32 term prediction residual
*    error          O: error of LPC synthesis filter
*
* Function:
*    Subframe preprocessing
*
*    Impulse response computation:
*       The impulse response, h(n), of the weighted synthesis filter
*
*       H(z) * W(z) = A(z/g1) / ( A'(z) * A(z/g2) )
*
*       is computed each subframe. This impulse response is needed for
*       the search of adaptive and fixed codebooks. The impulse response h(n)
*       is computed by filtering the vector of coefficients of
*       the filter A(z/g1) extended by zeros through the two filters
*       1/A'(z) and 1/A(z/g2).
*
*    Target signal computation:
*       The target signal for adaptive codebook search is usually computed
*       by subtracting the zero input response of
*       the weighted synthesis filter H(z) * W(z) from the weighted
*       speech signal Sw(n). This is performed on a
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -