📄 preprocess.c
字号:
#endifSpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate){ int i; int N, N3, N4, M; SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState)); st->frame_size = frame_size; /* Round ps_size down to the nearest power of two */#if 0 i=1; st->ps_size = st->frame_size; while(1) { if (st->ps_size & ~i) { st->ps_size &= ~i; i<<=1; } else { break; } } if (st->ps_size < 3*st->frame_size/4) st->ps_size = st->ps_size * 3 / 2;#else st->ps_size = st->frame_size;#endif N = st->ps_size; N3 = 2*N - st->frame_size; N4 = st->frame_size - N3; st->sampling_rate = sampling_rate; st->denoise_enabled = 1; st->vad_enabled = 0; st->dereverb_enabled = 0; st->reverb_decay = 0; st->reverb_level = 0; st->noise_suppress = NOISE_SUPPRESS_DEFAULT; st->echo_suppress = ECHO_SUPPRESS_DEFAULT; st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT; st->speech_prob_start = SPEECH_PROB_START_DEFAULT; st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; st->echo_state = NULL; st->nbands = NB_BANDS; M = st->nbands; st->bank = filterbank_new(M, sampling_rate, N, 1); st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->update_prob = (int*)speex_alloc(N*sizeof(int)); st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); conj_window(st->window, 2*N3); for (i=2*N3;i<2*st->ps_size;i++) st->window[i]=Q15_ONE; if (N4>0) { for (i=N3-1;i>=0;i--) { st->window[i+N3+N4]=st->window[i+N3]; st->window[i+N3]=1; } } for (i=0;i<N+M;i++) { st->noise[i]=QCONST32(1.f,NOISE_SHIFT); st->reverb_estimate[i]=0; st->old_ps[i]=1; st->gain[i]=Q15_ONE; st->post[i]=SHL16(1, SNR_SHIFT); st->prior[i]=SHL16(1, SNR_SHIFT); } for (i=0;i<N;i++) st->update_prob[i] = 1; for (i=0;i<N3;i++) { st->inbuf[i]=0; st->outbuf[i]=0; }#ifndef FIXED_POINT st->agc_enabled = 0; st->agc_level = 8000; st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); for (i=0;i<N;i++) { float ff=((float)i)*.5*sampling_rate/((float)N); /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/ st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); if (st->loudness_weight[i]<.01f) st->loudness_weight[i]=.01f; st->loudness_weight[i] *= st->loudness_weight[i]; } /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ st->loudness = 1e-15; st->agc_gain = 1; st->nb_loudness_adapt = 0; st->max_gain = 30; st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); st->prev_loudness = 1; st->init_max = 1;#endif st->was_speech = 0; st->fft_lookup = spx_fft_init(2*N); st->nb_adapt=0; st->min_count=0; return st;}void speex_preprocess_state_destroy(SpeexPreprocessState *st){ speex_free(st->frame); speex_free(st->ft); speex_free(st->ps); speex_free(st->gain2); speex_free(st->gain_floor); speex_free(st->window); speex_free(st->noise); speex_free(st->reverb_estimate); speex_free(st->old_ps); speex_free(st->gain); speex_free(st->prior); speex_free(st->post);#ifndef FIXED_POINT speex_free(st->loudness_weight);#endif speex_free(st->echo_noise); speex_free(st->residual_echo); speex_free(st->S); speex_free(st->Smin); speex_free(st->Stmp); speex_free(st->update_prob); speex_free(st->zeta); speex_free(st->inbuf); speex_free(st->outbuf); spx_fft_destroy(st->fft_lookup); filterbank_destroy(st->bank); speex_free(st);}/* FIXME: The AGC doesn't work yet with fixed-point*/#ifndef FIXED_POINTstatic void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft){ int i; int N = st->ps_size; float target_gain; float loudness=1.f; float rate; for (i=2;i<N;i++) { loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; } loudness=sqrt(loudness); /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ if (Pframe>.3f) { st->nb_loudness_adapt++; /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ rate = .03*Pframe*Pframe; st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); st->loudness_accum = (1-rate)*st->loudness_accum + rate; if (st->init_max < st->max_gain && st->nb_adapt > 20) st->init_max *= 1.f + .1f*Pframe*Pframe; } /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) { if (target_gain > st->max_increase_step*st->agc_gain) target_gain = st->max_increase_step*st->agc_gain; if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness) target_gain = st->max_decrease_step*st->agc_gain; if (target_gain > st->max_gain) target_gain = st->max_gain; if (target_gain > st->init_max) target_gain = st->init_max; st->agc_gain = target_gain; } /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ for (i=0;i<2*N;i++) ft[i] *= st->agc_gain; st->prev_loudness = loudness;}#endifstatic void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x){ int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; spx_word32_t *ps=st->ps; /* 'Build' input frame */ for (i=0;i<N3;i++) st->frame[i]=st->inbuf[i]; for (i=0;i<st->frame_size;i++) st->frame[N3+i]=x[i]; /* Update inbuf */ for (i=0;i<N3;i++) st->inbuf[i]=x[N4+i]; /* Windowing */ for (i=0;i<2*N;i++) st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);#ifdef FIXED_POINT { spx_word16_t max_val=0; for (i=0;i<2*N;i++) max_val = MAX16(max_val, ABS16(st->frame[i])); st->frame_shift = 14-spx_ilog2(EXTEND32(max_val)); for (i=0;i<2*N;i++) st->frame[i] = SHL16(st->frame[i], st->frame_shift); }#endif /* Perform FFT */ spx_fft(st->fft_lookup, st->frame, st->ft); /* Power spectrum */ ps[0]=MULT16_16(st->ft[0],st->ft[0]); for (i=1;i<N;i++) ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]); for (i=0;i<N;i++) st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift); filterbank_compute_bank32(st->bank, ps, ps+N);}static void update_noise_prob(SpeexPreprocessState *st){ int i; int min_range; int N = st->ps_size; for (i=1;i<N-1;i++) st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); if (st->nb_adapt==1) { for (i=0;i<N;i++) st->Smin[i] = st->Stmp[i] = 0; } if (st->nb_adapt < 100) min_range = 15; else if (st->nb_adapt < 1000) min_range = 50; else if (st->nb_adapt < 10000) min_range = 150; else min_range = 300; if (st->min_count > min_range) { st->min_count = 0; for (i=0;i<N;i++) { st->Smin[i] = MIN32(st->Stmp[i], st->S[i]); st->Stmp[i] = st->S[i]; } } else { for (i=0;i<N;i++) { st->Smin[i] = MIN32(st->Smin[i], st->S[i]); st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); } } for (i=0;i<N;i++) { if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20))) st->update_prob[i] = 1; else st->update_prob[i] = 0; /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ /*fprintf (stderr, "%f ", st->update_prob[i]);*/ }}#define NOISE_OVERCOMPENS 1.void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo){ return speex_preprocess_run(st, x);}int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x){ int i; int M; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; spx_word32_t *ps=st->ps; spx_word32_t Zframe; spx_word16_t Pframe; spx_word16_t beta, beta_1; spx_word16_t effective_echo_suppress; st->nb_adapt++; st->min_count++; beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); beta_1 = Q15_ONE-beta; M = st->nbands; /* Deal with residual echo if provided */ if (st->echo_state) { speex_echo_get_residual(st->echo_state, st->residual_echo, N);#ifndef FIXED_POINT /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */ if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f)) { for (i=0;i<N;i++) st->residual_echo[i] = 0; }#endif for (i=0;i<N;i++) st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]); filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N); } else { for (i=0;i<N+M;i++) st->echo_noise[i] = 0; } preprocess_analysis(st, x); update_noise_prob(st); /* Noise estimation always updated for the 10 first frames */ /*if (st->nb_adapt<10) { for (i=1;i<N-1;i++) st->update_prob[i] = 0; } */ /* Update the noise estimate for the frequencies where it can be */ for (i=0;i<N;i++) { if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT)) st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT))); } filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -