📄 sp_enc.cpp
字号:
}
//#endif
/*
* Pitch_ol
*
*
* Parameters:
* mode I: AMR mode
* vadSt B: VAD state struct
* signal I: signal used to compute the open loop pitch
* [[-pit_max]:[-1]]
* pit_min I: minimum pitch lag
* pit_max I: maximum pitch lag
* L_frame I: length of frame to compute pitch
* dtx I: DTX flag
* idx I: frame index
*
* Function:
* Compute the open loop pitch lag.
*
* Open-loop pitch analysis is performed twice per frame (each 10 ms)
* to find two estimates of the pitch lag in each frame.
* Open-loop pitch analysis is performed as follows.
* In the first step, 3 maxima of the correlation:
*
* 79
* O(k) = SUM Sw(n)*Sw(n-k)
* n=0
*
* are found in the three ranges:
* pit_min ... 2*pit_min-1
* 2*pit_min ... 4*pit_min-1
* 4*pit_min ... pit_max
*
* The retained maxima O(t(i)), i = 1, 2, 3, are normalized by dividing by
*
* SQRT[SUM[POW(Sw(n-t(i)), 2]], i = 1, 2, 3,
* n
*
* respectively.
* The normalized maxima and corresponding delays are denoted by
* (LP_ORDER(i), t(i)), i = 1, 2, 3. The winner, Top, among the three normalized
* correlations is selected by favouring the delays with the values
* in the lower range. This is performed by weighting the normalized
* correlations corresponding to the longer delays. The best
* open-loop delay Top is determined as follows:
*
* Top = t(1)
* LP_ORDER(Top) = LP_ORDER(1)
* if LP_ORDER(2) > 0.85 * LP_ORDER(Top)
* LP_ORDER(Top) = LP_ORDER(2)
* Top = t(2)
* end
* if LP_ORDER(3) > 0.85 * LP_ORDER(Top)
* LP_ORDER(Top) = LP_ORDER(3)
* Top = t(3)
* end
*
* Returns:
* void
*/
static INT32 Pitch_ol( enum Mode mode, vadState *vadSt, float signal[], INT32 pit_min,
INT32 pit_max, INT16 L_frame, INT32 dtx, INT16 idx )
{
float corr[PIT_MAX + 1];
float max1, max2, max3, p_max1, p_max2, p_max3;
float *corr_ptr;
INT32 i, j;
/*
#ifdef VAD2
float r01, r02, r03;
float rmax1, rmax2, rmax3;
#else
*/
float corr_hp_max;
//#endif
//#ifndef VAD2
if ( dtx )
{
/* update tone detection */
if ( ( mode == MR475 ) || ( mode == MR515 ) )
{
vad_tone_detection_update( vadSt, 1 );
}
else
{
vad_tone_detection_update( vadSt, 0 );
}
}
//#endif
corr_ptr = &corr[pit_max];
/* 79 */
/* O(k) = SUM Sw(n)*Sw(n-k) */
/* n=0 */
comp_corr( signal, L_frame, pit_max, pit_min, corr_ptr );
/*
#ifdef VAD2
/ * Find a maximum for each section. * /
/ * Maxima 1 * /
j = pit_min << 2;
p_max1 =
Lag_max( corr_ptr, signal, L_frame, pit_max, j, &max1, dtx, &rmax1, &r01 );
/ * Maxima 2 * /
i = j - 1;
j = pit_min << 1;
p_max2 = Lag_max( corr_ptr, signal, L_frame, i, j, &max2, dtx, &rmax2, &r02 );
/ * Maxima 3 * /
i = j - 1;
p_max3 =
Lag_max( corr_ptr, signal, L_frame, i, pit_min, &max3, dtx, &rmax3, &r03 );
#else
*/
/* Find a maximum for each section. */
/* Maxima 1 */
j = pit_min << 2;
p_max1 = Lag_max( vadSt, corr_ptr, signal, L_frame, pit_max, j, &max1, dtx );
/* Maxima 2 */
i = j - 1;
j = pit_min << 1;
p_max2 = Lag_max( vadSt, corr_ptr, signal, L_frame, i, j, &max2, dtx );
/* Maxima 3 */
i = j - 1;
p_max3 = Lag_max( vadSt, corr_ptr, signal, L_frame, i, pit_min, &max3, dtx );
if ( dtx ) {
if ( idx == 1 ) {
/* calculate max high-passed filtered correlation of all lags */
hp_max( corr_ptr, signal, L_frame, pit_max, pit_min, &corr_hp_max );
/* update complex background detector */
vadSt->best_corr_hp = corr_hp_max * 0.5F;
}
}
//#endif
/* The best open-loop delay */
if ( ( max1 * 0.85F ) < max2 )
{
max1 = max2;
p_max1 = p_max2;
/*
#ifdef VAD2
if (dtx) {
rmax1 = rmax2;
r01 = r02;
}
#endif
*/
}
if ( ( max1 * 0.85F ) < max3 )
{
p_max1 = p_max3;
/*
#ifdef VAD2
if (dtx) {
rmax1 = rmax3;
r01 = r03;
}
#endif
*/
}
/*
#ifdef VAD2
if (dtx) {
vadSt->Rmax += rmax1; / * Save max correlation * /
vadSt->R0 += r01; / * Save max energy * /
}
#endif
*/
return( INT32 )p_max1;
}
/*
* Lag_max_wght
*
*
* Parameters:
* vadSt B: vad structure
* corr I: correlation vector
* signal I: signal
* L_frame I: length of frame to compute pitch
* old_lag I: old open-loop lag
* cor_max O: maximum correlation
* wght_flg I: weighting function flag
* gain_flg O: open-loop flag
* dtx I: dtx on/off
*
* Function:
* Find the lag that has maximum correlation of signal in a given delay range.
* maximum lag = 143
* minimum lag = 20
*
* Returns:
* p_max lag found
*/
static INT32 Lag_max_wght( vadState *vadSt, float corr[], float signal[], INT32 old_lag,
INT32 *cor_max, INT32 wght_flg, float *gain_flg, INT32 dtx )
{
float t0, t1, max;
float *psignal, *p1signal;
const float *ww, *we;
INT32 i, j, p_max;
ww = &corrweight[250];
we = &corrweight[266 - old_lag];
max = -FLT_MAX;
p_max = PIT_MAX;
/* see if the neigbouring emphasis is used */
if ( wght_flg > 0 )
{
/* find maximum correlation with weighting */
for ( i = PIT_MAX; i >= PIT_MIN; i-- )
{
/* Weighting of the correlation function. */
t0 = corr[ - i] * *ww--;
/* Weight the neighbourhood of the old lag. */
t0 *= *we--;
if ( t0 >= max )
{
max = t0;
p_max = i;
}
}
}
else
{
/* find maximum correlation with weighting */
for ( i = PIT_MAX; i >= PIT_MIN; i-- )
{
/* Weighting of the correlation function. */
t0 = corr[ - i] * *ww--;
if ( t0 >= max )
{
max = t0;
p_max = i;
}
}
}
psignal = &signal[0];
p1signal = &signal[ - p_max];
t0 = 0;
t1 = 0;
/* Compute energy */
for ( j = 0; j < FRAME_SIZE_BY2; j++, psignal++, p1signal++ )
{
t0 += *psignal * *p1signal;
t1 += *p1signal * *p1signal;
}
if ( dtx )
{
/*
#ifdef VAD2
vadSt->Rmax += t0; / * Save max correlation * /
vadSt->R0 += t1; / * Save max energy * /
#else
*/
/* update and detect tone */
vad_tone_detection_update( vadSt, 0 );
vad_tone_detection( vadSt, t0, t1 );
//#endif
}
/*
* gain flag is set according to the open_loop gain
* is t2/t1 > 0.4 ?
*/
*gain_flg = t0 - ( t1 * 0.4F );
*cor_max = 0;
return( p_max );
}
/*
* gmed_n
*
*
* Parameters:
* ind I: values
* n I: The number of gains
*
* Function:
* Calculates N-point median.
*
* Returns:
* index of the median value
*/
static INT32 gmed_n( INT32 ind[], INT32 n )
{
INT32 i, j, ix = 0;
INT32 max;
INT32 medianIndex;
INT32 tmp[9];
INT32 tmp2[9];
for ( i = 0; i < n; i++ )
{
tmp2[i] = ind[i];
}
for ( i = 0; i < n; i++ )
{
max = -32767;
for ( j = 0; j < n; j++ )
{
if ( tmp2[j] >= max )
{
max = tmp2[j];
ix = j;
}
}
tmp2[ix] = -32768;
tmp[i] = ix;
}
medianIndex = tmp[( n >>1 )];
return( ind[medianIndex] );
}
/*
* Pitch_ol_wgh
*
*
* Parameters:
* old_T0_med O: old Cl lags median
* wght_flg I: weighting function flag
* ada_w B:
* vadSt B: VAD state struct
* signal I: signal used to compute the open loop pitch
* [[-pit_max]:[-1]]
* old_lags I: history with old stored Cl lags
* ol_gain_flg I: OL gain flag
* idx I: frame index
* dtx I: DTX flag
*
* Function:
* Open-loop pitch search with weight
*
* Open-loop pitch analysis is performed twice per frame (every 10 ms)
* for the 10.2 kbit/s mode to find two estimates of the pitch lag
* in each frame. The open-loop pitch analysis is done in order to simplify
* the pitch analysis and confine the closed loop pitch search to
* a small number of lags around the open-loop estimated lags.
* Open-loop pitch estimation is based on the weighted speech signal
* which is obtained by filtering the input speech signal through
* the weighting filter.
* The correlation of weighted speech is determined.
* The estimated pitch-lag is the delay that maximises
* the weighted autocorrelation function. To enhance pitch-lag analysis
* the autocorrelation function estimate is modified by a weighting window.
* The weighting emphasises relevant pitch-lags, thus increasing
* the likelihood of selecting the correct delay.
* minimum pitch lag = 20
* maximum pitch lag = 143
*
* Returns:
* p_max1 open loop pitch lag
*/
static INT32 Pitch_ol_wgh( INT32 *old_T0_med, INT16 *wght_flg, float *ada_w,
vadState *vadSt, float signal[], INT32 old_lags[],
float ol_gain_flg[], INT16 idx, INT32 dtx )
{
float corr[PIT_MAX + 1];
//#ifndef VAD2
float corr_hp_max;
//#endif
float *corrPtr;
INT32 i, max1, p_max1;
/* calculate all coreelations of signal, from pit_min to pit_max */
corrPtr = &corr[PIT_MAX];
comp_corr( signal, FRAME_SIZE_BY2, PIT_MAX, PIT_MIN, corrPtr );
p_max1 = Lag_max_wght( vadSt, corrPtr, signal, *old_T0_med,
&max1, *wght_flg, &ol_gain_flg[idx], dtx );
if ( ol_gain_flg[idx] > 0 )
{
/* Calculate 5-point median of previous lags */
/* Shift buffer */
for ( i = 4; i > 0; i-- )
{
old_lags[i] = old_lags[i - 1];
}
old_lags[0] = p_max1;
*old_T0_med = gmed_n( old_lags, 5 );
*ada_w = 1;
}
else
{
*old_T0_med = p_max1;
*ada_w = *ada_w * 0.9F;
}
if ( *ada_w < 0.3 )
{
*wght_flg = 0;
}
else
{
*wght_flg = 1;
}
//#ifndef VAD2
if ( dtx )
{
if ( idx == 1 )
{
/* calculate max high-passed filtered correlation of all lags */
hp_max( corrPtr, signal, FRAME_SIZE_BY2, PIT_MAX, PIT_MIN, &corr_hp_max );
/* update complex background detector */
vadSt->best_corr_hp = corr_hp_max * 0.5F;
}
}
//#endif
return( p_max1 );
}
/*
* ol_ltp
*
*
* Parameters:
* mode I: AMR mode
* vadSt B: VAD state struct
* wsp I: signal used to compute the OL pitch
* T_op O: open loop pitch lag
* ol_gain_flg I: OL gain flag
* old_T0_med O: old Cl lags median
* wght_flg I: weighting function flag
* ada_w B:
* old_lags I: history with old stored Cl lags
* ol_gain_flg I: OL gain flag
* dtx I: DTX flag
* idx I: frame index
*
* Function:
* Compute the open loop pitch lag.
*
* Open-loop pitch analysis is performed in order to simplify
* the pitch analysis and confine the closed-loop pitch search to
* a small number of lags around the open-loop estimated lags.
* Open-loop pitch estimation is based on the weighted speech signal Sw(n)
* which is obtained by filtering the input speech signal through
* the weighting filter W(z) = A(z/g1) / A(z/g2). That is,
* in a subframe of size L, the weighted speech is given by:
*
* 10
* Sw(n) = S(n) + SUM[ a(i) * g1(i) * S(n-i) ]
* i=1
* 10
* - SUM[ a(i) * g2(i) * Sw(n-i) ], n = 0, ..., L-1
* i=1
*
* Returns:
* void
*/
static void ol_ltp( enum Mode mode, vadState *vadSt, float wsp[], INT32 *T_op,
float ol_gain_flg[], INT32 *old_T0_med, INT16 *wght_flg,
float *ada_w, INT32 *old_lags, INT32 dtx, INT16 idx )
{
if ( mode != MR102 )
{
ol_gain_flg[0] = 0;
ol_gain_flg[1] = 0;
}
if ( ( mode == MR475 ) || ( mode == MR515 ) )
{
*T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN, PIT_MAX, FRAME_SIZE, dtx, idx );
}
else
{
if ( mode <= MR795 )
{
*T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN, PIT_MAX, FRAME_SIZE_BY2, dtx, idx );
}
else if ( mode == MR102 )
{
*T_op = Pitch_ol_wgh( old_T0_med, wght_flg, ada_w, vadSt,
wsp, old_lags, ol_gain_flg, idx, dtx );
}
else
{
*T_op = Pitch_ol( mode, vadSt, wsp, PIT_MIN_MR122,
PIT_MAX, FRAME_SIZE_BY2, dtx, idx );
}
}
}
/*
* subframePreProc
*
*
* Parameters:
* mode I: AMR mode
* gamma1 I: spectral exp. factor 1
* gamma1_12k2 I: spectral exp. factor 1 for EFR
* gamma2 I: spectral exp. factor 2
* A I: A(z) unquantized for the 4 subframes
* Aq I: A(z) quantized for the 4 subframes
* speech I: speech segment
* mem_err I: pointer to error signal
* mem_w0 I: memory of weighting filter
* zero I: pointer to zero vector
* ai_zero O: history of weighted synth. filter
* exc O: INT32 term prediction residual
* h1 O: impulse response
* xn O: target vector for pitch search
* res2 O: INT32 term prediction residual
* error O: error of LPC synthesis filter
*
* Function:
* Subframe preprocessing
*
* Impulse response computation:
* The impulse response, h(n), of the weighted synthesis filter
*
* H(z) * W(z) = A(z/g1) / ( A'(z) * A(z/g2) )
*
* is computed each subframe. This impulse response is needed for
* the search of adaptive and fixed codebooks. The impulse response h(n)
* is computed by filtering the vector of coefficients of
* the filter A(z/g1) extended by zeros through the two filters
* 1/A'(z) and 1/A(z/g2).
*
* Target signal computation:
* The target signal for adaptive codebook search is usually computed
* by subtracting the zero input response of
* the weighted synthesis filter H(z) * W(z) from the weighted
* speech signal Sw(n). This is performed on a
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -