📄 preprocess.c
字号:
/* Special case for first frame */ if (st->nb_adapt==1) for (i=0;i<N+M;i++) st->old_ps[i] = ps[i]; /* Compute a posteriori SNR */ for (i=0;i<N+M;i++) { spx_word16_t gamma; /* Total noise estimate including residual echo and reverberation */ spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); /* A posteriori SNR = ps/noise - 1*/ st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); } /*print_vec(st->post, N+M, "");*/ /* Recursive average of the a priori SNR. A bit smoothed for the psd components */ st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15); for (i=1;i<N-1;i++) st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])), MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15); for (i=N-1;i<N+M;i++) st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15); /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */ Zframe = 0; for (i=N;i<N+M;i++) Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) Technically this is actually wrong because the EM gaim assumes a slightly different probability distribution */ for (i=N;i<N+M;i++) { /* See EM and Cohen papers*/ spx_word32_t theta; /* Gain from hypergeometric function */ spx_word32_t MM; /* Weiner filter gain */ spx_word16_t prior_ratio; /* a priority probability of speech presence based on Bark sub-band alone */ spx_word16_t P1; /* Speech absence a priori probability (considering sub-band and frame) */ spx_word16_t q;#ifdef FIXED_POINT spx_word16_t tmp;#endif prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); MM = hypergeom_gain(theta); /* Gain with bound */ st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); /* Save old Bark power spectrum */ st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i])); q = Q15_ONE-MULT16_16_Q15(Pframe,P1);#ifdef FIXED_POINT theta = MIN32(theta, EXTEND32(32767));/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1)))); tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*//*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8)); st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));#else st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));#endif } /* Convert the EM gains and speech prob to linear frequency */ filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); filterbank_compute_psd16(st->bank,st->gain+N, st->gain); /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ if (1) { filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ for (i=0;i<N;i++) { spx_word32_t MM; spx_word32_t theta; spx_word16_t prior_ratio; spx_word16_t tmp; spx_word16_t p; spx_word16_t g; /* Wiener filter gain */ prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); /* Optimal estimator for loudness domain */ MM = hypergeom_gain(theta); /* EM gain with bound */ g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); /* Interpolated speech probability of presence */ p = st->gain2[i]; /* Constrain the gain to be close to the Bark scale gain */ if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) g = MULT16_16(3,st->gain[i]); st->gain[i] = g; /* Save old power spectrum */ st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); /* Apply gain floor */ if (st->gain[i] < st->gain_floor[i]) st->gain[i] = st->gain_floor[i]; /* Exponential decay model for reverberation (unused) */ /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ /* Take into account speech probability of presence (loudness domain MMSE estimator) */ /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); st->gain2[i]=SQR16_Q15(tmp); /* Use this if you want a log-domain MMSE estimator instead */ /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/ } } else { for (i=N;i<N+M;i++) { spx_word16_t tmp; spx_word16_t p = st->gain2[i]; st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); st->gain2[i]=SQR16_Q15(tmp); } filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); } /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ if (!st->denoise_enabled) { for (i=0;i<N+M;i++) st->gain2[i]=Q15_ONE; } /* Apply computed gain */ for (i=1;i<N;i++) { st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]); st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]); } st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); /*FIXME: This *will* not work for fixed-point */#ifndef FIXED_POINT if (st->agc_enabled) speex_compute_agc(st, Pframe, st->ft);#endif /* Inverse FFT with 1/N scaling */ spx_ifft(st->fft_lookup, st->ft, st->frame); /* Scale back to original (lower) amplitude */ for (i=0;i<2*N;i++) st->frame[i] = PSHR16(st->frame[i], st->frame_shift); /*FIXME: This *will* not work for fixed-point */#ifndef FIXED_POINT if (st->agc_enabled) { float max_sample=0; for (i=0;i<2*N;i++) if (fabs(st->frame[i])>max_sample) max_sample = fabs(st->frame[i]); if (max_sample>28000.f) { float damp = 28000.f/max_sample; for (i=0;i<2*N;i++) st->frame[i] *= damp; } }#endif /* Synthesis window (for WOLA) */ for (i=0;i<2*N;i++) st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); /* Perform overlap and add */ for (i=0;i<N3;i++) x[i] = st->outbuf[i] + st->frame[i]; for (i=0;i<N4;i++) x[N3+i] = st->frame[N3+i]; /* Update outbuf */ for (i=0;i<N3;i++) st->outbuf[i] = st->frame[st->frame_size+i]; /* FIXME: This VAD is a kludge */ if (st->vad_enabled) { if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue)) { st->was_speech=1; return 1; } else { st->was_speech=0; return 0; } } else { return 1; }}void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x){ int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; int M; spx_word32_t *ps=st->ps; M = st->nbands; st->min_count++; preprocess_analysis(st, x); update_noise_prob(st); for (i=1;i<N-1;i++) { if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) { st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT)); } } for (i=0;i<N3;i++) st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]); /* Save old power spectrum */ for (i=0;i<N+M;i++) st->old_ps[i] = ps[i]; for (i=0;i<N;i++) st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);}int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr){ int i; SpeexPreprocessState *st; st=(SpeexPreprocessState*)state; switch(request) { case SPEEX_PREPROCESS_SET_DENOISE: st->denoise_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_DENOISE: (*(spx_int32_t*)ptr) = st->denoise_enabled; break;#ifndef FIXED_POINT case SPEEX_PREPROCESS_SET_AGC: st->agc_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_AGC: (*(spx_int32_t*)ptr) = st->agc_enabled; break; case SPEEX_PREPROCESS_SET_AGC_LEVEL: st->agc_level = (*(float*)ptr); if (st->agc_level<1) st->agc_level=1; if (st->agc_level>32768) st->agc_level=32768; break; case SPEEX_PREPROCESS_GET_AGC_LEVEL: (*(float*)ptr) = st->agc_level; break; case SPEEX_PREPROCESS_SET_AGC_INCREMENT: st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); break; case SPEEX_PREPROCESS_GET_AGC_INCREMENT: (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size); break; case SPEEX_PREPROCESS_SET_AGC_DECREMENT: st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); break; case SPEEX_PREPROCESS_GET_AGC_DECREMENT: (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size); break; case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN: st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr)); break; case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN: (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain)); break;#endif case SPEEX_PREPROCESS_SET_VAD: speex_warning("The VAD has been replaced by a hack pending a complete rewrite"); st->vad_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_VAD: (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB: st->dereverb_enabled = (*(spx_int32_t*)ptr); for (i=0;i<st->ps_size;i++) st->reverb_estimate[i]=0; break; case SPEEX_PREPROCESS_GET_DEREVERB: (*(spx_int32_t*)ptr) = st->dereverb_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: st->reverb_level = (*(float*)ptr); break; case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL: (*(float*)ptr) = st->reverb_level; break; case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: st->reverb_decay = (*(float*)ptr); break; case SPEEX_PREPROCESS_GET_DEREVERB_DECAY: (*(float*)ptr) = st->reverb_decay; break; case SPEEX_PREPROCESS_SET_PROB_START: *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_START: (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100); break; case SPEEX_PREPROCESS_SET_PROB_CONTINUE: *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_CONTINUE: (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100); break; case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS: st->noise_suppress = -ABS(*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS: (*(spx_int32_t*)ptr) = st->noise_suppress; break; case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS: st->echo_suppress = -ABS(*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS: (*(spx_int32_t*)ptr) = st->echo_suppress; break; case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE: st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE: (*(spx_int32_t*)ptr) = st->echo_suppress_active; break; case SPEEX_PREPROCESS_SET_ECHO_STATE: st->echo_state = (SpeexEchoState*)ptr; break; case SPEEX_PREPROCESS_GET_ECHO_STATE: ptr = (void*)st->echo_state; break; default: speex_warning_int("Unknown speex_preprocess_ctl request: ", request); return -1; } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -