📄 preprocess_spx.c
字号:
/* Update the noise estimate for the frequencies where it can be */
for (i=0;i<N;i++)
{
if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
}
filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
/* Special case for first frame */
if (st->nb_adapt==1)
for (i=0;i<N+M;i++)
st->old_ps[i] = ps[i];
/* Compute a posteriori SNR */
for (i=0;i<N+M;i++)
{
spx_word16_t gamma;
/* Total noise estimate including residual echo and reverberation */
spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
/* A posteriori SNR = ps/noise - 1*/
st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
/* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
/* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
}
/*print_vec(st->post, N+M, "");*/
/* Recursive average of the a priori SNR. A bit smoothed for the psd components */
st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
for (i=1;i<N-1;i++)
st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
for (i=N-1;i<N+M;i++)
st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
/* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
Zframe = 0;
for (i=N;i<N+M;i++)
Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
/* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
Technically this is actually wrong because the EM gaim assumes a slightly different probability
distribution */
for (i=N;i<N+M;i++)
{
/* See EM and Cohen papers*/
spx_word32_t theta;
/* Gain from hypergeometric function */
spx_word32_t MM;
/* Weiner filter gain */
spx_word16_t prior_ratio;
/* a priority probability of speech presence based on Bark sub-band alone */
spx_word16_t P1;
/* Speech absence a priori probability (considering sub-band and frame) */
spx_word16_t q;
#ifdef FIXED_POINT
spx_word16_t tmp;
#endif
prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
MM = hypergeom_gain(theta);
/* Gain with bound */
st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
/* Save old Bark power spectrum */
st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
#ifdef FIXED_POINT
theta = MIN32(theta, EXTEND32(32767));
/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
/*Q8*/tmp = PSHR(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8);
st->gain2[i]=DIV32_16(SHL(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
#else
st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
#endif
}
/* Convert the EM gains and speech prob to linear frequency */
filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
/* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
if (1)
{
filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
/* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
for (i=0;i<N;i++)
{
spx_word32_t MM;
spx_word32_t theta;
spx_word16_t prior_ratio;
spx_word16_t tmp;
spx_word16_t p;
spx_word16_t g;
/* Wiener filter gain */
prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
/* Optimal estimator for loudness domain */
MM = hypergeom_gain(theta);
/* EM gain with bound */
g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
/* Interpolated speech probability of presence */
p = st->gain2[i];
/* Constrain the gain to be close to the Bark scale gain */
if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
g = MULT16_16(3,st->gain[i]);
st->gain[i] = g;
/* Save old power spectrum */
st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
/* Apply gain floor */
if (st->gain[i] < st->gain_floor[i])
st->gain[i] = st->gain_floor[i];
/* Exponential decay model for reverberation (unused) */
/*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
/* Take into account speech probability of presence (loudness domain MMSE estimator) */
/* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
st->gain2[i]=SQR16_Q15(tmp);
/* Use this if you want a log-domain MMSE estimator instead */
/*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
}
} else {
for (i=N;i<N+M;i++)
{
spx_word16_t tmp;
spx_word16_t p = st->gain2[i];
st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
st->gain2[i]=SQR16_Q15(tmp);
}
filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
}
/* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
if (!st->denoise_enabled)
{
for (i=0;i<N+M;i++)
st->gain2[i]=Q15_ONE;
}
/*FIXME: This *will* not work for fixed-point */
#ifndef FIXED_POINT
if (st->agc_enabled)
speex_compute_agc(st);
#endif
/* Apply computed gain */
for (i=1;i<N;i++)
{
st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
}
st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
/* Inverse FFT with 1/N scaling */
spx_ifft(st->fft_lookup, st->ft, st->frame);
/* Scale back to original (lower) amplitude */
for (i=0;i<2*N;i++)
st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
/*FIXME: This *will* not work for fixed-point */
#ifndef FIXED_POINT
if (st->agc_enabled)
{
float max_sample=0;
for (i=0;i<2*N;i++)
if (fabs(st->frame[i])>max_sample)
max_sample = fabs(st->frame[i]);
if (max_sample>28000.f)
{
float damp = 28000.f/max_sample;
for (i=0;i<2*N;i++)
st->frame[i] *= damp;
}
}
#endif
/* Synthesis window (for WOLA) */
for (i=0;i<2*N;i++)
st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
/* Perform overlap and add */
for (i=0;i<N3;i++)
x[i] = st->outbuf[i] + st->frame[i];
for (i=0;i<N4;i++)
x[N3+i] = st->frame[N3+i];
/* Update outbuf */
for (i=0;i<N3;i++)
st->outbuf[i] = st->frame[st->frame_size+i];
/* FIXME: This VAD is a kludge */
if (st->vad_enabled)
{
if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
{
st->was_speech=1;
return 1;
} else
{
st->was_speech=0;
return 0;
}
} else {
return 1;
}
}
void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
{
int i;
int N = st->ps_size;
int N3 = 2*N - st->frame_size;
int M;
spx_word32_t *ps=st->ps;
M = st->nbands;
st->min_count++;
preprocess_analysis(st, x);
update_noise_prob(st);
for (i=1;i<N-1;i++)
{
if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
{
st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
}
}
for (i=0;i<N3;i++)
st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
/* Save old power spectrum */
for (i=0;i<N+M;i++)
st->old_ps[i] = ps[i];
for (i=0;i<N;i++)
st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
}
int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
{
int i;
SpeexPreprocessState *st;
st=(SpeexPreprocessState*)state;
switch(request)
{
case SPEEX_PREPROCESS_SET_DENOISE:
st->denoise_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_DENOISE:
(*(spx_int32_t*)ptr) = st->denoise_enabled;
break;
#ifndef FIXED_POINT
case SPEEX_PREPROCESS_SET_AGC:
st->agc_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_AGC:
(*(spx_int32_t*)ptr) = st->agc_enabled;
break;
case SPEEX_PREPROCESS_SET_AGC_LEVEL:
st->agc_level = (*(float*)ptr);
if (st->agc_level<1)
st->agc_level=1;
if (st->agc_level>32768)
st->agc_level=32768;
break;
case SPEEX_PREPROCESS_GET_AGC_LEVEL:
(*(float*)ptr) = st->agc_level;
break;
#endif
case SPEEX_PREPROCESS_SET_VAD:
speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
st->vad_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_VAD:
(*(spx_int32_t*)ptr) = st->vad_enabled;
break;
case SPEEX_PREPROCESS_SET_DEREVERB:
st->dereverb_enabled = (*(spx_int32_t*)ptr);
for (i=0;i<st->ps_size;i++)
st->reverb_estimate[i]=0;
break;
case SPEEX_PREPROCESS_GET_DEREVERB:
(*(spx_int32_t*)ptr) = st->dereverb_enabled;
break;
case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
st->reverb_level = (*(float*)ptr);
break;
case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
(*(float*)ptr) = st->reverb_level;
break;
case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
st->reverb_decay = (*(float*)ptr);
break;
case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
(*(float*)ptr) = st->reverb_decay;
break;
case SPEEX_PREPROCESS_SET_PROB_START:
*(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));
st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);
break;
case SPEEX_PREPROCESS_GET_PROB_START:
(*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
break;
case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
*(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));
st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);
break;
case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
(*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
break;
case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
(*(spx_int32_t*)ptr) = st->noise_suppress;
break;
case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
(*(spx_int32_t*)ptr) = st->echo_suppress;
break;
case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
break;
case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
(*(spx_int32_t*)ptr) = st->echo_suppress_active;
break;
case SPEEX_PREPROCESS_SET_ECHO_STATE:
st->echo_state = (SpeexEchoState*)ptr;
break;
case SPEEX_PREPROCESS_GET_ECHO_STATE:
ptr = (void*)st->echo_state;
break;
default:
speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
return -1;
}
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -