📄 preprocess_spx.c

📁 一个开源SIP协议栈
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
   
   /* Update the noise estimate for the frequencies where it can be */
   for (i=0;i<N;i++)
   {
      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
         st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
   }
   filterbank_compute_bank32(st->bank, st->noise, st->noise+N);

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=0;i<N+M;i++)
         st->old_ps[i] = ps[i];

   /* Compute a posteriori SNR */
   for (i=0;i<N+M;i++)
   {
      spx_word16_t gamma;
      
      /* Total noise estimate including residual echo and reverberation */
      spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
      
      /* A posteriori SNR = ps/noise - 1*/
      st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
      st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
      
      /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
      gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
      
      /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
      st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
      st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
   }

   /*print_vec(st->post, N+M, "");*/

   /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
   st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
   for (i=1;i<N-1;i++)
      st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
                           MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
   for (i=N-1;i<N+M;i++)
      st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);

   /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
   Zframe = 0;
   for (i=N;i<N+M;i++)
      Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
   Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
   
   effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
   
   compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
         
   /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) 
      Technically this is actually wrong because the EM gaim assumes a slightly different probability 
      distribution */
   for (i=N;i<N+M;i++)
   {
      /* See EM and Cohen papers*/
      spx_word32_t theta;
      /* Gain from hypergeometric function */
      spx_word32_t MM;
      /* Weiner filter gain */
      spx_word16_t prior_ratio;
      /* a priority probability of speech presence based on Bark sub-band alone */
      spx_word16_t P1;
      /* Speech absence a priori probability (considering sub-band and frame) */
      spx_word16_t q;
#ifdef FIXED_POINT
      spx_word16_t tmp;
#endif
      
      prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
      theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));

      MM = hypergeom_gain(theta);
      /* Gain with bound */
      st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
      /* Save old Bark power spectrum */
      st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);

      P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
      q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
#ifdef FIXED_POINT
      theta = MIN32(theta, EXTEND32(32767));
/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
      tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
/*Q8*/tmp = PSHR(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8);
      st->gain2[i]=DIV32_16(SHL(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
#else
      st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
#endif
   }
   /* Convert the EM gains and speech prob to linear frequency */
   filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
   filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
   
   /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
   if (1)
   {
      filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
   
      /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
      for (i=0;i<N;i++)
      {
         spx_word32_t MM;
         spx_word32_t theta;
         spx_word16_t prior_ratio;
         spx_word16_t tmp;
         spx_word16_t p;
         spx_word16_t g;
         
         /* Wiener filter gain */
         prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
         theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));

         /* Optimal estimator for loudness domain */
         MM = hypergeom_gain(theta);
         /* EM gain with bound */
         g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
         /* Interpolated speech probability of presence */
         p = st->gain2[i];
                  
         /* Constrain the gain to be close to the Bark scale gain */
         if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
            g = MULT16_16(3,st->gain[i]);
         st->gain[i] = g;
         
         /* Save old power spectrum */
         st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
         
         /* Apply gain floor */
         if (st->gain[i] < st->gain_floor[i])
            st->gain[i] = st->gain_floor[i];

         /* Exponential decay model for reverberation (unused) */
         /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
         
         /* Take into account speech probability of presence (loudness domain MMSE estimator) */
         /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
         st->gain2[i]=SQR16_Q15(tmp);

         /* Use this if you want a log-domain MMSE estimator instead */
         /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
      }
   } else {
      for (i=N;i<N+M;i++)
      {
         spx_word16_t tmp;
         spx_word16_t p = st->gain2[i];
         st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);         
         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
         st->gain2[i]=SQR16_Q15(tmp);
      }
      filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
   }
   
   /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
   if (!st->denoise_enabled)
   {
      for (i=0;i<N+M;i++)
         st->gain2[i]=Q15_ONE;
   }
   
   /*FIXME: This *will* not work for fixed-point */
#ifndef FIXED_POINT
   if (st->agc_enabled)
      speex_compute_agc(st);
#endif
   
   /* Apply computed gain */
   for (i=1;i<N;i++)
   {
      st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
      st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
   }
   st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
   st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
   
   /* Inverse FFT with 1/N scaling */
   spx_ifft(st->fft_lookup, st->ft, st->frame);
   /* Scale back to original (lower) amplitude */
   for (i=0;i<2*N;i++)
      st->frame[i] = PSHR16(st->frame[i], st->frame_shift);

   /*FIXME: This *will* not work for fixed-point */
#ifndef FIXED_POINT
   if (st->agc_enabled)
   {
      float max_sample=0;
      for (i=0;i<2*N;i++)
         if (fabs(st->frame[i])>max_sample)
            max_sample = fabs(st->frame[i]);
      if (max_sample>28000.f)
      {
         float damp = 28000.f/max_sample;
         for (i=0;i<2*N;i++)
            st->frame[i] *= damp;
      }
   }
#endif
   
   /* Synthesis window (for WOLA) */
   for (i=0;i<2*N;i++)
      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);

   /* Perform overlap and add */
   for (i=0;i<N3;i++)
      x[i] = st->outbuf[i] + st->frame[i];
   for (i=0;i<N4;i++)
      x[N3+i] = st->frame[N3+i];
   
   /* Update outbuf */
   for (i=0;i<N3;i++)
      st->outbuf[i] = st->frame[st->frame_size+i];

   /* FIXME: This VAD is a kludge */
   if (st->vad_enabled)
   {
      if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
      {
         st->was_speech=1;
         return 1;
      } else
      {
         st->was_speech=0;
         return 0;
      }
   } else {
      return 1;
   }
}

void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
{
   int i;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int M;
   spx_word32_t *ps=st->ps;

   M = st->nbands;
   st->min_count++;
   
   preprocess_analysis(st, x);

   update_noise_prob(st);
   
   for (i=1;i<N-1;i++)
   {
      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
      {
         st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
      }
   }

   for (i=0;i<N3;i++)
      st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);

   /* Save old power spectrum */
   for (i=0;i<N+M;i++)
      st->old_ps[i] = ps[i];

   for (i=0;i<N;i++)
      st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
}


int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
{
   int i;
   SpeexPreprocessState *st;
   st=(SpeexPreprocessState*)state;
   switch(request)
   {
   case SPEEX_PREPROCESS_SET_DENOISE:
      st->denoise_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_DENOISE:
      (*(spx_int32_t*)ptr) = st->denoise_enabled;
      break;
#ifndef FIXED_POINT
   case SPEEX_PREPROCESS_SET_AGC:
      st->agc_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_AGC:
      (*(spx_int32_t*)ptr) = st->agc_enabled;
      break;

   case SPEEX_PREPROCESS_SET_AGC_LEVEL:
      st->agc_level = (*(float*)ptr);
      if (st->agc_level<1)
         st->agc_level=1;
      if (st->agc_level>32768)
         st->agc_level=32768;
      break;
   case SPEEX_PREPROCESS_GET_AGC_LEVEL:
      (*(float*)ptr) = st->agc_level;
      break;
#endif
   case SPEEX_PREPROCESS_SET_VAD:
      speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
      st->vad_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_VAD:
      (*(spx_int32_t*)ptr) = st->vad_enabled;
      break;
   
   case SPEEX_PREPROCESS_SET_DEREVERB:
      st->dereverb_enabled = (*(spx_int32_t*)ptr);
      for (i=0;i<st->ps_size;i++)
         st->reverb_estimate[i]=0;
      break;
   case SPEEX_PREPROCESS_GET_DEREVERB:
      (*(spx_int32_t*)ptr) = st->dereverb_enabled;
      break;

   case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
      st->reverb_level = (*(float*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
      (*(float*)ptr) = st->reverb_level;
      break;
   
   case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
      st->reverb_decay = (*(float*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
      (*(float*)ptr) = st->reverb_decay;
      break;

   case SPEEX_PREPROCESS_SET_PROB_START:
      *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));
      st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);
      break;
   case SPEEX_PREPROCESS_GET_PROB_START:
      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
      break;

   case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
      *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr));
      st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100);
      break;
   case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
      break;

   case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
      st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
      (*(spx_int32_t*)ptr) = st->noise_suppress;
      break;
   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
      st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
      (*(spx_int32_t*)ptr) = st->echo_suppress;
      break;
   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
      st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
      break;
   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
      (*(spx_int32_t*)ptr) = st->echo_suppress_active;
      break;
   case SPEEX_PREPROCESS_SET_ECHO_STATE:
      st->echo_state = (SpeexEchoState*)ptr;
      break;
   case SPEEX_PREPROCESS_GET_ECHO_STATE:
      ptr = (void*)st->echo_state;
      break;

   default:
      speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
      return -1;
   }
   return 0;
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -