📄 preprocess.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 3 页
字号:
   {      if (st->ps_size & ~i)      {         st->ps_size &= ~i;         i<<=1;      } else {         break;      }   }         if (st->ps_size < 3*st->frame_size/4)      st->ps_size = st->ps_size * 3 / 2;#else   st->ps_size = st->frame_size;#endif   N = st->ps_size;   N3 = 2*N - st->frame_size;   N4 = st->frame_size - N3;      st->sampling_rate = sampling_rate;   st->denoise_enabled = 1;   st->vad_enabled = 0;   st->dereverb_enabled = 0;   st->reverb_decay = 0;   st->reverb_level = 0;   st->noise_suppress = NOISE_SUPPRESS_DEFAULT;   st->echo_suppress = ECHO_SUPPRESS_DEFAULT;   st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;   st->speech_prob_start = SPEECH_PROB_START_DEFAULT;   st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;   st->echo_state = NULL;      st->nbands = NB_BANDS;   M = st->nbands;   st->bank = filterbank_new(M, sampling_rate, N, 1);      st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));   st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));   st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));      st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));   st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));   st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));   st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));   st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));   st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));   st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));      st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));   st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));   st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));   st->update_prob = (int*)speex_alloc(N*sizeof(int));      st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));   st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));   conj_window(st->window, 2*N3);   for (i=2*N3;i<2*st->ps_size;i++)      st->window[i]=Q15_ONE;      if (N4>0)   {      for (i=N3-1;i>=0;i--)      {         st->window[i+N3+N4]=st->window[i+N3];         st->window[i+N3]=1;      }   }   for (i=0;i<N+M;i++)   {      st->noise[i]=QCONST32(1.f,NOISE_SHIFT);      st->reverb_estimate[i]=0;      st->old_ps[i]=1;      st->gain[i]=Q15_ONE;      st->post[i]=SHL16(1, SNR_SHIFT);      st->prior[i]=SHL16(1, SNR_SHIFT);   }   for (i=0;i<N;i++)      st->update_prob[i] = 1;   for (i=0;i<N3;i++)   {      st->inbuf[i]=0;      st->outbuf[i]=0;   }#ifndef FIXED_POINT   st->agc_enabled = 0;   st->agc_level = 8000;   st->loudness_weight = (float*)speex_alloc(N*sizeof(float));   for (i=0;i<N;i++)   {      float ff=((float)i)*.5*sampling_rate/((float)N);      /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/      st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);      if (st->loudness_weight[i]<.01f)         st->loudness_weight[i]=.01f;      st->loudness_weight[i] *= st->loudness_weight[i];   }   /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/   st->loudness = 1e-15;   st->agc_gain = 1;   st->max_gain = 30;   st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);   st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);   st->prev_loudness = 1;   st->init_max = 1;#endif   st->was_speech = 0;   st->fft_lookup = spx_fft_init(2*N);   st->nb_adapt=0;   st->min_count=0;   return st;}EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st){   speex_free(st->frame);   speex_free(st->ft);   speex_free(st->ps);   speex_free(st->gain2);   speex_free(st->gain_floor);   speex_free(st->window);   speex_free(st->noise);   speex_free(st->reverb_estimate);   speex_free(st->old_ps);   speex_free(st->gain);   speex_free(st->prior);   speex_free(st->post);#ifndef FIXED_POINT   speex_free(st->loudness_weight);#endif   speex_free(st->echo_noise);   speex_free(st->residual_echo);   speex_free(st->S);   speex_free(st->Smin);   speex_free(st->Stmp);   speex_free(st->update_prob);   speex_free(st->zeta);   speex_free(st->inbuf);   speex_free(st->outbuf);   spx_fft_destroy(st->fft_lookup);   filterbank_destroy(st->bank);   speex_free(st);}/* FIXME: The AGC doesn't work yet with fixed-point*/#ifndef FIXED_POINTstatic void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft){   int i;   int N = st->ps_size;   float target_gain;   float loudness=1.f;   float rate;      for (i=2;i<N;i++)   {      loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];   }   loudness=sqrt(loudness);      /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&   loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/   if (Pframe>.3f)   {      /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/      rate = .03*Pframe*Pframe;      st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);      st->loudness_accum = (1-rate)*st->loudness_accum + rate;      if (st->init_max < st->max_gain && st->nb_adapt > 20)         st->init_max *= 1.f + .1f*Pframe*Pframe;   }   /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/      target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);   if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)   {      if (target_gain > st->max_increase_step*st->agc_gain)         target_gain = st->max_increase_step*st->agc_gain;      if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)         target_gain = st->max_decrease_step*st->agc_gain;      if (target_gain > st->max_gain)         target_gain = st->max_gain;      if (target_gain > st->init_max)         target_gain = st->init_max;         st->agc_gain = target_gain;   }   /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/         for (i=0;i<2*N;i++)      ft[i] *= st->agc_gain;   st->prev_loudness = loudness;}#endifstatic void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x){   int i;   int N = st->ps_size;   int N3 = 2*N - st->frame_size;   int N4 = st->frame_size - N3;   spx_word32_t *ps=st->ps;   /* 'Build' input frame */   for (i=0;i<N3;i++)      st->frame[i]=st->inbuf[i];   for (i=0;i<st->frame_size;i++)      st->frame[N3+i]=x[i];      /* Update inbuf */   for (i=0;i<N3;i++)      st->inbuf[i]=x[N4+i];   /* Windowing */   for (i=0;i<2*N;i++)      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);#ifdef FIXED_POINT   {      spx_word16_t max_val=0;      for (i=0;i<2*N;i++)         max_val = MAX16(max_val, ABS16(st->frame[i]));      st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));      for (i=0;i<2*N;i++)         st->frame[i] = SHL16(st->frame[i], st->frame_shift);   }#endif      /* Perform FFT */   spx_fft(st->fft_lookup, st->frame, st->ft);            /* Power spectrum */   ps[0]=MULT16_16(st->ft[0],st->ft[0]);   for (i=1;i<N;i++)      ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);   for (i=0;i<N;i++)      st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);   filterbank_compute_bank32(st->bank, ps, ps+N);}static void update_noise_prob(SpeexPreprocessState *st){   int i;   int min_range;   int N = st->ps_size;   for (i=1;i<N-1;i++)      st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])                       + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);   st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);   st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);      if (st->nb_adapt==1)   {      for (i=0;i<N;i++)         st->Smin[i] = st->Stmp[i] = 0;   }   if (st->nb_adapt < 100)      min_range = 15;   else if (st->nb_adapt < 1000)      min_range = 50;   else if (st->nb_adapt < 10000)      min_range = 150;   else      min_range = 300;   if (st->min_count > min_range)   {      st->min_count = 0;      for (i=0;i<N;i++)      {         st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);         st->Stmp[i] = st->S[i];      }   } else {      for (i=0;i<N;i++)      {         st->Smin[i] = MIN32(st->Smin[i], st->S[i]);         st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);            }   }   for (i=0;i<N;i++)   {      if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])         st->update_prob[i] = 1;      else         st->update_prob[i] = 0;      /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/      /*fprintf (stderr, "%f ", st->update_prob[i]);*/   }}#define NOISE_OVERCOMPENS 1.void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo){   return speex_preprocess_run(st, x);}EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x){   int i;   int M;   int N = st->ps_size;   int N3 = 2*N - st->frame_size;   int N4 = st->frame_size - N3;   spx_word32_t *ps=st->ps;   spx_word32_t Zframe;   spx_word16_t Pframe;   spx_word16_t beta, beta_1;   spx_word16_t effective_echo_suppress;      st->nb_adapt++;   if (st->nb_adapt>20000)      st->nb_adapt = 20000;   st->min_count++;      beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));   beta_1 = Q15_ONE-beta;   M = st->nbands;   /* Deal with residual echo if provided */   if (st->echo_state)   {      speex_echo_get_residual(st->echo_state, st->residual_echo, N);#ifndef FIXED_POINT      /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */      if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))      {         for (i=0;i<N;i++)            st->residual_echo[i] = 0;      }#endif      for (i=0;i<N;i++)         st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);      filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);   } else {      for (i=0;i<N+M;i++)         st->echo_noise[i] = 0;   }   preprocess_analysis(st, x);   update_noise_prob(st);   /* Noise estimation always updated for the 10 first frames */   /*if (st->nb_adapt<10)   {      for (i=1;i<N-1;i++)         st->update_prob[i] = 0;   }   */      /* Update the noise estimate for the frequencies where it can be */   for (i=0;i<N;i++)   {      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))         st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));   }   filterbank_compute_bank32(st->bank, st->noise, st->noise+N);   /* Special case for first frame */   if (st->nb_adapt==1)      for (i=0;i<N+M;i++)         st->old_ps[i] = ps[i];   /* Compute a posteriori SNR */   for (i=0;i<N+M;i++)   {      spx_word16_t gamma;            /* Total noise estimate including residual echo and reverberation */      spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);            /* A posteriori SNR = ps/noise - 1*/      st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));      st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));            /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */      gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));            /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */      st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));      st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));   }   /*print_vec(st->post, N+M, "");*/   /* Recursive average of the a priori SNR. A bit smoothed for the psd components */   st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -