📄 speex_preprocess.c
字号:
freq_start = (int)(300.0f*2*N/st->sampling_rate);
freq_end = (int)(2000.0f*2*N/st->sampling_rate);
for (i=freq_start;i<freq_end;i++)
{
if (st->S[i] > 20.f*st->Smin[i]+1000.f)
active_bands+=1;
}
active_bands /= (freq_end-freq_start+1);
if (active_bands > .2f)
{
float loudness=0.f;
float rate, rate2=.2f;
st->nb_loudness_adapt++;
rate=2.0f/(1+st->nb_loudness_adapt);
if (rate < .05f)
rate = .05f;
if (rate < .1f && pow(loudness, LOUDNESS_EXP) > st->loudness)
rate = .1f;
if (rate < .2f && pow(loudness, LOUDNESS_EXP) > 3.f*st->loudness)
rate = .2f;
if (rate < .4f && pow(loudness, LOUDNESS_EXP) > 10.f*st->loudness)
rate = .4f;
for (i=2;i<N;i++)
{
loudness += scale*st->ps[i] * st->gain2[i] * st->gain2[i] * st->loudness_weight[i];
}
loudness=sqrt(loudness);
/*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
st->loudness = (1-rate)*st->loudness + (rate)*pow(loudness, LOUDNESS_EXP);
st->loudness2 = (1-rate2)*st->loudness2 + rate2*pow(st->loudness, 1.0f/LOUDNESS_EXP);
loudness = pow(st->loudness, 1.0f/LOUDNESS_EXP);
/*fprintf (stderr, "%f %f %f\n", loudness, st->loudness2, rate);*/
}
agc_gain = st->agc_level/st->loudness2;
/*fprintf (stderr, "%f %f %f %f\n", active_bands, st->loudness, st->loudness2, agc_gain);*/
if (agc_gain>200)
agc_gain = 200;
for (i=0;i<N;i++)
st->gain2[i] *= agc_gain;
}
static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
{
int i;
int N = st->ps_size;
int N3 = 2*N - st->frame_size;
int N4 = st->frame_size - N3;
float *ps=st->ps;
/* 'Build' input frame */
for (i=0;i<N3;i++)
st->frame[i]=st->inbuf[i];
for (i=0;i<st->frame_size;i++)
st->frame[N3+i]=x[i];
/* Update inbuf */
for (i=0;i<N3;i++)
st->inbuf[i]=x[N4+i];
/* Windowing */
for (i=0;i<2*N;i++)
st->frame[i] *= st->window[i];
/* Perform FFT */
spx_drft_forward(st->fft_lookup, st->frame);
/* Power spectrum */
ps[0]=1;
for (i=1;i<N;i++)
ps[i]=1+st->frame[2*i-1]*st->frame[2*i-1] + st->frame[2*i]*st->frame[2*i];
}
static void update_noise_prob(SpeexPreprocessState *st)
{
int i;
int N = st->ps_size;
for (i=1;i<N-1;i++)
st->S[i] = 100.f+ .8f*st->S[i] + .05f*st->ps[i-1]+.1f*st->ps[i]+.05f*st->ps[i+1];
if (st->nb_preprocess<1)
{
for (i=1;i<N-1;i++)
st->Smin[i] = st->Stmp[i] = st->S[i]+100.f;
}
if (st->nb_preprocess%200==0)
{
for (i=1;i<N-1;i++)
{
st->Smin[i] = min(st->Stmp[i], st->S[i]);
st->Stmp[i] = st->S[i];
}
} else {
for (i=1;i<N-1;i++)
{
st->Smin[i] = min(st->Smin[i], st->S[i]);
st->Stmp[i] = min(st->Stmp[i], st->S[i]);
}
}
for (i=1;i<N-1;i++)
{
st->update_prob[i] *= .2f;
if (st->S[i] > 2.5*st->Smin[i])
st->update_prob[i] += .8f;
/*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
/*fprintf (stderr, "%f ", st->update_prob[i]);*/
}
}
#define NOISE_OVERCOMPENS 1.4
int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
{
int i;
int is_speech=1;
float mean_post=0;
float mean_prior=0;
int N = st->ps_size;
int N3 = 2*N - st->frame_size;
int N4 = st->frame_size - N3;
float scale=.5f/N;
float *ps=st->ps;
float Zframe=0, Pframe;
preprocess_analysis(st, x);
update_noise_prob(st);
st->nb_preprocess++;
/* Noise estimation always updated for the 20 first times */
if (st->nb_adapt<10)
{
update_noise(st, ps, echo);
}
/* Deal with residual echo if provided */
if (echo)
for (i=1;i<N;i++)
st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*4.0*echo[i]);
/* Compute a posteriori SNR */
for (i=1;i<N;i++)
{
float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
st->post[i] = ps[i]/tot_noise - 1.f;
if (st->post[i]>100.f)
st->post[i]=100.f;
/*if (st->post[i]<0)
st->post[i]=0;*/
mean_post+=st->post[i];
}
mean_post /= N;
if (mean_post<0.f)
mean_post=0.f;
/* Special case for first frame */
if (st->nb_adapt==1)
for (i=1;i<N;i++)
st->old_ps[i] = ps[i];
/* Compute a priori SNR */
{
/* A priori update rate */
for (i=1;i<N;i++)
{
float gamma = .1+.9*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
/* A priori SNR update */
st->prior[i] = gamma*max(0.0f,st->post[i]) +
(1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
if (st->prior[i]>100.f)
st->prior[i]=100.f;
mean_prior+=st->prior[i];
}
}
mean_prior /= N;
#if 0
for (i=0;i<N;i++)
{
fprintf (stderr, "%f ", st->prior[i]);
}
fprintf (stderr, "\n");
#endif
/*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/
if (st->nb_preprocess>=20)
{
int do_update = 0;
float noise_ener=0, sig_ener=0;
/* If SNR is low (both a priori and a posteriori), update the noise estimate*/
/*if (mean_prior<.23 && mean_post < .5)*/
if (mean_prior<.23f && mean_post < .5f)
do_update = 1;
for (i=1;i<N;i++)
{
noise_ener += st->noise[i];
sig_ener += ps[i];
}
if (noise_ener > 3.f*sig_ener)
do_update = 1;
/*do_update = 0;*/
if (do_update)
{
st->consec_noise++;
} else {
st->consec_noise=0;
}
}
if (st->vad_enabled)
is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);
if (st->consec_noise>=3)
{
update_noise(st, st->old_ps, echo);
} else {
for (i=1;i<N-1;i++)
{
if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
{
if (echo)
st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*4.0*echo[i]);
else
st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i];
}
}
}
for (i=1;i<N;i++)
{
st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
}
{
int freq_start = (int)(300.0f*2.f*N/st->sampling_rate);
int freq_end = (int)(2000.0f*2.f*N/st->sampling_rate);
for (i=freq_start;i<freq_end;i++)
{
Zframe += st->zeta[i];
}
Zframe /= (freq_end-freq_start);
}
st->Zlast = Zframe;
Pframe = qcurve(Zframe);
/*fprintf (stderr, "%f\n", Pframe);*/
/* Compute gain according to the Ephraim-Malah algorithm */
for (i=1;i<N;i++)
{
float MM;
float theta;
float prior_ratio;
float p, q;
float zeta1;
float P1;
prior_ratio = st->prior[i]/(1.0001f+st->prior[i]);
theta = (1.f+st->post[i])*prior_ratio;
if (i==1 || i==N-1)
zeta1 = st->zeta[i];
else
zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
P1 = qcurve (zeta1);
/* FIXME: add global prob (P2) */
q = 1-Pframe*P1;
q = 1-P1;
if (q>.95f)
q=.95f;
p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
/*p=1;*/
/* Optimal estimator for loudness domain */
MM = hypergeom_gain(theta);
st->gain[i] = prior_ratio * MM;
/*Put some (very arbitraty) limit on the gain*/
if (st->gain[i]>2.f)
{
st->gain[i]=2.f;
}
st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
if (st->denoise_enabled)
{
st->gain2[i] = p*p*st->gain[i];
/*st->gain2[i]=(p*sqrt(st->gain[i])+.05*(1-p))*(p*sqrt(st->gain[i])+.05*(1-p));*/
/*st->gain2[i] = pow(st->gain[i], p) * pow(.2f,1.f-p);*/
} else {
st->gain2[i]=1.f;
}
}
st->gain2[0]=st->gain[0]=0.f;
st->gain2[N-1]=st->gain[N-1]=0.f;
/*
for (i=30;i<N-2;i++)
{
st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]);
}
for (i=30;i<N-2;i++)
st->gain2[i] = st->gain[i];
*/
if (st->agc_enabled)
speex_compute_agc(st, mean_prior);
#if 0
if (!is_speech)
{
for (i=0;i<N;i++)
st->gain2[i] = 0;
}
#if 0
else {
for (i=0;i<N;i++)
st->gain2[i] = 1;
}
#endif
#endif
/* Apply computed gain */
for (i=1;i<N;i++)
{
st->frame[2*i-1] *= st->gain2[i];
st->frame[2*i] *= st->gain2[i];
}
/* Get rid of the DC and very low frequencies */
st->frame[0]=0;
st->frame[1]=0;
st->frame[2]=0;
/* Nyquist frequency is mostly useless too */
st->frame[2*N-1]=0;
/* Inverse FFT with 1/N scaling */
spx_drft_backward(st->fft_lookup, st->frame);
for (i=0;i<2*N;i++)
st->frame[i] *= scale;
{
float max_sample=0;
for (i=0;i<2*N;i++)
if (fabs(st->frame[i])>max_sample)
max_sample = fabs(st->frame[i]);
if (max_sample>28000.f)
{
float damp = 28000.f/max_sample;
for (i=0;i<2*N;i++)
st->frame[i] *= damp;
}
}
for (i=0;i<2*N;i++)
st->frame[i] *= st->window[i];
/* Perform overlap and add */
for (i=0;i<N3;i++)
x[i] = st->outbuf[i] + st->frame[i];
for (i=0;i<N4;i++)
x[N3+i] = st->frame[N3+i];
/* Update outbuf */
for (i=0;i<N3;i++)
st->outbuf[i] = st->frame[st->frame_size+i];
/* Save old power spectrum */
for (i=1;i<N;i++)
st->old_ps[i] = ps[i];
return is_speech;
}
void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
{
int i;
int N = st->ps_size;
int N3 = 2*N - st->frame_size;
float *ps=st->ps;
preprocess_analysis(st, x);
update_noise_prob(st);
st->nb_preprocess++;
for (i=1;i<N-1;i++)
{
if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
{
if (echo)
st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*4.0*echo[i]);
else
st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i];
}
}
for (i=0;i<N3;i++)
st->outbuf[i] = x[st->frame_size-N3+i]*st->window[st->frame_size+i];
/* Save old power spectrum */
for (i=1;i<N;i++)
st->old_ps[i] = ps[i];
for (i=1;i<N;i++)
st->reverb_estimate[i] *= st->reverb_decay;
}
int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
{
int i;
SpeexPreprocessState *st;
st=(SpeexPreprocessState*)state;
switch(request)
{
case SPEEX_PREPROCESS_SET_DENOISE:
st->denoise_enabled = (*(int*)ptr);
break;
case SPEEX_PREPROCESS_GET_DENOISE:
(*(int*)ptr) = st->denoise_enabled;
break;
case SPEEX_PREPROCESS_SET_AGC:
st->agc_enabled = (*(int*)ptr);
break;
case SPEEX_PREPROCESS_GET_AGC:
(*(int*)ptr) = st->agc_enabled;
break;
case SPEEX_PREPROCESS_SET_AGC_LEVEL:
st->agc_level = (*(float*)ptr);
if (st->agc_level<1)
st->agc_level=1;
if (st->agc_level>32768)
st->agc_level=32768;
break;
case SPEEX_PREPROCESS_GET_AGC_LEVEL:
(*(float*)ptr) = st->agc_level;
break;
case SPEEX_PREPROCESS_SET_VAD:
st->vad_enabled = (*(int*)ptr);
break;
case SPEEX_PREPROCESS_GET_VAD:
(*(int*)ptr) = st->vad_enabled;
break;
case SPEEX_PREPROCESS_SET_DEREVERB:
st->dereverb_enabled = (*(int*)ptr);
for (i=0;i<st->ps_size;i++)
st->reverb_estimate[i]=0;
break;
case SPEEX_PREPROCESS_GET_DEREVERB:
(*(int*)ptr) = st->dereverb_enabled;
break;
case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
st->reverb_level = (*(float*)ptr);
break;
case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
(*(float*)ptr) = st->reverb_level;
break;
case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
st->reverb_decay = (*(float*)ptr);
break;
case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
(*(float*)ptr) = st->reverb_decay;
break;
case SPEEX_PREPROCESS_SET_PROB_START:
st->speech_prob_start = (*(int*)ptr) / 100.0;
if ( st->speech_prob_start > 1 || st->speech_prob_start < 0 )
st->speech_prob_start = SPEEX_PROB_START_DEFAULT;
break;
case SPEEX_PREPROCESS_GET_PROB_START:
(*(int*)ptr) = st->speech_prob_start * 100;
break;
case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
st->speech_prob_continue = (*(int*)ptr) / 100.0;
if ( st->speech_prob_continue > 1 || st->speech_prob_continue < 0 )
st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT;
break;
case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
(*(int*)ptr) = st->speech_prob_continue * 100;
break;
default:
speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
return -1;
}
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -