📄 preprocess.c

📁 一个开源的sip源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
   /* Special case for first frame */   if (st->nb_adapt==1)      for (i=0;i<N+M;i++)         st->old_ps[i] = ps[i];   /* Compute a posteriori SNR */   for (i=0;i<N+M;i++)   {      spx_word16_t gamma;            /* Total noise estimate including residual echo and reverberation */      spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);            /* A posteriori SNR = ps/noise - 1*/      st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));      st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));            /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */      gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));            /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */      st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));      st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));   }   /*print_vec(st->post, N+M, "");*/   /* Recursive average of the a priori SNR. A bit smoothed for the psd components */   st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);   for (i=1;i<N-1;i++)      st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),                           MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);   for (i=N-1;i<N+M;i++)      st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);   /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */   Zframe = 0;   for (i=N;i<N+M;i++)      Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));   Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));      effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));      compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);            /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)       Technically this is actually wrong because the EM gaim assumes a slightly different probability       distribution */   for (i=N;i<N+M;i++)   {      /* See EM and Cohen papers*/      spx_word32_t theta;      /* Gain from hypergeometric function */      spx_word32_t MM;      /* Weiner filter gain */      spx_word16_t prior_ratio;      /* a priority probability of speech presence based on Bark sub-band alone */      spx_word16_t P1;      /* Speech absence a priori probability (considering sub-band and frame) */      spx_word16_t q;#ifdef FIXED_POINT      spx_word16_t tmp;#endif            prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));      theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));      MM = hypergeom_gain(theta);      /* Gain with bound */      st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));      /* Save old Bark power spectrum */      st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);      P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));      q = Q15_ONE-MULT16_16_Q15(Pframe,P1);#ifdef FIXED_POINT      theta = MIN32(theta, EXTEND32(32767));/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));      tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*//*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));      st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));#else      st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));#endif   }   /* Convert the EM gains and speech prob to linear frequency */   filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);   filterbank_compute_psd16(st->bank,st->gain+N, st->gain);      /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */   if (1)   {      filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);         /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */      for (i=0;i<N;i++)      {         spx_word32_t MM;         spx_word32_t theta;         spx_word16_t prior_ratio;         spx_word16_t tmp;         spx_word16_t p;         spx_word16_t g;                  /* Wiener filter gain */         prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));         theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));         /* Optimal estimator for loudness domain */         MM = hypergeom_gain(theta);         /* EM gain with bound */         g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));         /* Interpolated speech probability of presence */         p = st->gain2[i];                           /* Constrain the gain to be close to the Bark scale gain */         if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])            g = MULT16_16(3,st->gain[i]);         st->gain[i] = g;                  /* Save old power spectrum */         st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);                  /* Apply gain floor */         if (st->gain[i] < st->gain_floor[i])            st->gain[i] = st->gain_floor[i];         /* Exponential decay model for reverberation (unused) */         /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/                  /* Take into account speech probability of presence (loudness domain MMSE estimator) */         /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));         st->gain2[i]=SQR16_Q15(tmp);         /* Use this if you want a log-domain MMSE estimator instead */         /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/      }   } else {      for (i=N;i<N+M;i++)      {         spx_word16_t tmp;         spx_word16_t p = st->gain2[i];         st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);                  tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));         st->gain2[i]=SQR16_Q15(tmp);      }      filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);   }      /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */   if (!st->denoise_enabled)   {      for (i=0;i<N+M;i++)         st->gain2[i]=Q15_ONE;   }         /* Apply computed gain */   for (i=1;i<N;i++)   {      st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);      st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);   }   st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);   st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);      /*FIXME: This *will* not work for fixed-point */#ifndef FIXED_POINT   if (st->agc_enabled)      speex_compute_agc(st, Pframe, st->ft);#endif   /* Inverse FFT with 1/N scaling */   spx_ifft(st->fft_lookup, st->ft, st->frame);   /* Scale back to original (lower) amplitude */   for (i=0;i<2*N;i++)      st->frame[i] = PSHR16(st->frame[i], st->frame_shift);   /*FIXME: This *will* not work for fixed-point */#ifndef FIXED_POINT   if (st->agc_enabled)   {      float max_sample=0;      for (i=0;i<2*N;i++)         if (fabs(st->frame[i])>max_sample)            max_sample = fabs(st->frame[i]);      if (max_sample>28000.f)      {         float damp = 28000.f/max_sample;         for (i=0;i<2*N;i++)            st->frame[i] *= damp;      }   }#endif      /* Synthesis window (for WOLA) */   for (i=0;i<2*N;i++)      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);   /* Perform overlap and add */   for (i=0;i<N3;i++)      x[i] = st->outbuf[i] + st->frame[i];   for (i=0;i<N4;i++)      x[N3+i] = st->frame[N3+i];      /* Update outbuf */   for (i=0;i<N3;i++)      st->outbuf[i] = st->frame[st->frame_size+i];   /* FIXME: This VAD is a kludge */   if (st->vad_enabled)   {      if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))      {         st->was_speech=1;         return 1;      } else      {         st->was_speech=0;         return 0;      }   } else {      return 1;   }}void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x){   int i;   int N = st->ps_size;   int N3 = 2*N - st->frame_size;   int M;   spx_word32_t *ps=st->ps;   M = st->nbands;   st->min_count++;      preprocess_analysis(st, x);   update_noise_prob(st);      for (i=1;i<N-1;i++)   {      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))      {         st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));      }   }   for (i=0;i<N3;i++)      st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);   /* Save old power spectrum */   for (i=0;i<N+M;i++)      st->old_ps[i] = ps[i];   for (i=0;i<N;i++)      st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);}int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr){   int i;   SpeexPreprocessState *st;   st=(SpeexPreprocessState*)state;   switch(request)   {   case SPEEX_PREPROCESS_SET_DENOISE:      st->denoise_enabled = (*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_DENOISE:      (*(spx_int32_t*)ptr) = st->denoise_enabled;      break;#ifndef FIXED_POINT   case SPEEX_PREPROCESS_SET_AGC:      st->agc_enabled = (*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_AGC:      (*(spx_int32_t*)ptr) = st->agc_enabled;      break;   case SPEEX_PREPROCESS_SET_AGC_LEVEL:      st->agc_level = (*(float*)ptr);      if (st->agc_level<1)         st->agc_level=1;      if (st->agc_level>32768)         st->agc_level=32768;      break;   case SPEEX_PREPROCESS_GET_AGC_LEVEL:      (*(float*)ptr) = st->agc_level;      break;   case SPEEX_PREPROCESS_SET_AGC_INCREMENT:      st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);      break;   case SPEEX_PREPROCESS_GET_AGC_INCREMENT:      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);      break;   case SPEEX_PREPROCESS_SET_AGC_DECREMENT:      st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);      break;   case SPEEX_PREPROCESS_GET_AGC_DECREMENT:      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);      break;   case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:      st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));      break;   case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));      break;#endif   case SPEEX_PREPROCESS_SET_VAD:      speex_warning("The VAD has been replaced by a hack pending a complete rewrite");      st->vad_enabled = (*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_VAD:      (*(spx_int32_t*)ptr) = st->vad_enabled;      break;      case SPEEX_PREPROCESS_SET_DEREVERB:      st->dereverb_enabled = (*(spx_int32_t*)ptr);      for (i=0;i<st->ps_size;i++)         st->reverb_estimate[i]=0;      break;   case SPEEX_PREPROCESS_GET_DEREVERB:      (*(spx_int32_t*)ptr) = st->dereverb_enabled;      break;   case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:      st->reverb_level = (*(float*)ptr);      break;   case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:      (*(float*)ptr) = st->reverb_level;      break;      case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:      st->reverb_decay = (*(float*)ptr);      break;   case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:      (*(float*)ptr) = st->reverb_decay;      break;   case SPEEX_PREPROCESS_SET_PROB_START:      *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));      st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);      break;   case SPEEX_PREPROCESS_GET_PROB_START:      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);      break;   case SPEEX_PREPROCESS_SET_PROB_CONTINUE:      *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));      st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);      break;   case SPEEX_PREPROCESS_GET_PROB_CONTINUE:      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);      break;   case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:      st->noise_suppress = -ABS(*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:      (*(spx_int32_t*)ptr) = st->noise_suppress;      break;   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:      st->echo_suppress = -ABS(*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:      (*(spx_int32_t*)ptr) = st->echo_suppress;      break;   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:      st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);      break;   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:      (*(spx_int32_t*)ptr) = st->echo_suppress_active;      break;   case SPEEX_PREPROCESS_SET_ECHO_STATE:      st->echo_state = (SpeexEchoState*)ptr;      break;   case SPEEX_PREPROCESS_GET_ECHO_STATE:      ptr = (void*)st->echo_state;      break;   default:      speex_warning_int("Unknown speex_preprocess_ctl request: ", request);      return -1;   }   return 0;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -