📄 wb_vad.c

📁 关于AMR-WB+语音压缩编码的实现代码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
      }
   }
   /* Update noise estimate (bckr_est) */
   for (i = 0; i < COMPLEN; i++)
   {
      temp = st->old_level[i] - st->bckr_est[i];
      if (temp < 0.0)
      { /* update downwards*/
         st->bckr_est[i] += -2 + (alpha_down * temp);
         /* limit minimum value of the noise estimate to NOISE_MIN */
         if (st->bckr_est[i] < NOISE_MIN)
         {
            st->bckr_est[i] = NOISE_MIN;
         }
      }
      else
      { /* update upwards */
         st->bckr_est[i] += bckr_add +(alpha_up * temp);
         /* limit maximum value of the noise estimate to NOISE_MAX */
         if (st->bckr_est[i] > NOISE_MAX)
         {
            st->bckr_est[i] = NOISE_MAX;
         }
      }
   }
   /* Update signal levels of the previous frame (old_level) */
   for(i = 0; i < COMPLEN; i++)
   {
      st->old_level[i] = level[i];
   }
}
/******************************************************************************
*
*     Function     : vad_decision
*     Purpose      : Calculates VAD_flag
*
*******************************************************************************
*/
static Word16 vad_decision( /*return value : VAD_flag */
  VadVars *st,          /* i/o : State structure                       */
  float level[COMPLEN], /* i   : sub-band levels of the input frame */
  double pow_sum         /* i   : power of the input frame           */
)
{
   Word16 i;
   double L_snr_sum;
   double L_temp;
   float vad_thr, temp, noise_level;
   Word16 low_power_flag;
   Word16 hang_len,burst_len;
   float ilog2_speech_level,ilog2_noise_level;
   float temp2;
   /*
      Calculate squared sum of the input levels (level)
      divided by the background noise components (bckr_est).
      */
   L_snr_sum = 0.0;
   for (i = 0; i < COMPLEN; i++)
   {
      temp = level[i]/st->bckr_est[i];
      L_snr_sum += temp * temp;
   }
   /* Calculate average level of estimated background noise */
   L_temp = 0.0;
   for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
   {
      L_temp += st->bckr_est[i];
   }
  noise_level = (float)(L_temp/16.0f);
  /*
     if SNR is lower than a threshold (MIN_SPEECH_SNR),
     and increase speech_level
  */
  temp = noise_level*MIN_SPEECH_SNR*8;
  if (st->speech_level < temp) {
    st->speech_level = temp;
  }
  ilog2_noise_level = (float)(-1024.0f*log10(noise_level / 2147483648.0f)/log10(2.0f));
  /*
  If SNR is very poor, speech_level is probably corrupted by noise level. This
  is correctred by subtracting -MIN_SPEECH_SNR*noise_level from speech level
  */
  ilog2_speech_level = (float)(-1024.0f*log10((st->speech_level-temp) / 2147483648.0f)/log10(2.0f));
  /*ilog2_speech_level = ilog2(st->speech_level);*/
  temp = NO_SLOPE * (ilog2_noise_level- NO_P1)+ THR_HIGH;
  temp2 = SP_CH_MIN + SP_SLOPE*(ilog2_speech_level - SP_P1);
  if (temp2 < SP_CH_MIN) {
    temp2 = SP_CH_MIN;
  }
  if (temp2 > SP_CH_MAX) {
    temp2 = SP_CH_MAX;
  }
  vad_thr = temp + temp2;
  if (vad_thr < THR_MIN)
  {
    vad_thr = THR_MIN;
  }
  /* Shift VAD decision register */
  st->vadreg = (short)((st->vadreg)>>1);
  /* Make intermediate VAD decision */
  if (L_snr_sum > (vad_thr*(float)COMPLEN/128.0f))
  {
    st->vadreg = (Word16)(st->vadreg | 0x4000);
  }
  /* primary vad decsion made */
  /* check if the input power (pow_sum) is lower than a threshold" */
  if (pow_sum < VAD_POW_LOW)
  {
    low_power_flag = 1;
  }
  else
  {
    low_power_flag = 0;
  }
   /* Update speech subband background noise estimates */
   noise_estimate_update(st, level);
   hang_len = (Word16)((Word16)(HANG_SLOPE * (vad_thr - HANG_P1) - 0.5) + HANG_HIGH);
   if (hang_len < HANG_LOW) {
     hang_len = HANG_LOW;
   };
   burst_len = (Word16)((Word16)(BURST_SLOPE * (vad_thr - BURST_P1) - 0.5) + BURST_HIGH);
   return(hangover_addition(st, low_power_flag, hang_len,burst_len));
}
/******************************************************************************
*
*     Estimate_Speech()
*     Purpose      : Estimate speech level
*
* Maximum signal level is searched and stored to the variable sp_max.
* The speech frames must locate within SP_EST_COUNT number of frames to be counted.
* Thus, noisy frames having occasional VAD = "1" decisions will not
* affect to the estimated speech_level.
*
*******************************************************************************
*/
static void Estimate_Speech(
  VadVars *st,    /* i/o : State structure    */
  float in_level /* level of the input frame */
)
{
  float alpha, tmp;
  /* if the required activity count cannot be achieved, reset counters */
  if (SP_ACTIVITY_COUNT  > (SP_EST_COUNT - st->sp_est_cnt + st->sp_max_cnt))
  {
    st->sp_est_cnt = 0;
    st->sp_max = 0.0;
    st->sp_max_cnt = 0;
  }
  st->sp_est_cnt++;
  if (((st->vadreg & 0x4000) || (in_level > st->speech_level))
    && (in_level > MIN_SPEECH_LEVEL1))
  {
    if (in_level > st->sp_max) {
      st->sp_max = in_level;
    }
    st->sp_max_cnt++;
    if (st->sp_max_cnt >= SP_ACTIVITY_COUNT) {
      tmp = st->sp_max/2.0f; /* scale to get "average" speech level*/
      if (tmp > st->speech_level) {
        alpha = ALPHA_SP_UP;
      }
      else {
        alpha = ALPHA_SP_DOWN;
      }
      if (tmp > MIN_SPEECH_LEVEL2) {
        st->speech_level += alpha*(tmp - st->speech_level);
      }
      st->sp_max = 0.0;
      st->sp_max_cnt = 0;
      st->sp_est_cnt = 0;
    }
  }
}
/******************************************************************************
*                         PUBLIC PROGRAM CODE
******************************************************************************/
/******************************************************************************
*
*  Function:   wb_vad_init
*  Purpose:    Allocates state memory and initializes state memory
*
*******************************************************************************
*/
int wb_vad_init ( /* return: non-zero with error, zero for ok. */
                 VadVars **state    /* i/o : State structure    */
                 )
{
  VadVars* s;
  if (state == (VadVars **) NULL){
    fprintf(stderr, "vad_init: invalid parameter\n");
    return -1;
  }
  *state = NULL;
  /* allocate memory */
  if ((s = (VadVars *) malloc(sizeof(VadVars))) == NULL){
    fprintf(stderr, "vad_init: can not malloc state structure\n");
    return -1;
  }
  wb_vad_reset(s);
  *state = s;
  return 0;
}
/******************************************************************************
*
*  Function:   wb_vad_reset
*  Purpose:    Initializes state memory to zero
*
*******************************************************************************
*******************************************************************************
*/
int wb_vad_reset ( /* return: non-zero with error, zero for ok. */
  VadVars *state  /* i/o : State structure    */
)
{
   Word16 i, j;
   if (state == (VadVars *) NULL){
      fprintf(stderr, "vad_reset: invalid parameter\n");
      return -1;
   }
   /* Initialize pitch detection variables */
   state->pitch_tone = 0;
   state->vadreg = 0;
   state->hang_count = 0;
   state->burst_count = 0;
   state->hang_count = 0;
   /* initialize memory used by the filter bank */
   for (i = 0; i < F_5TH_CNT; i++)
   {
	  for (j = 0; j < 2; j++)
      {
         state->a_data5[i][j] = 0.0;
      }
   }
   for (i = 0; i < F_3TH_CNT; i++)
   {
      state->a_data3[i] = 0.0;
   }
   /* initialize the rest of the memory */
   for (i = 0; i < COMPLEN; i++)
   {
      state->bckr_est[i] = NOISE_INIT;
      state->old_level[i] = NOISE_INIT;
      state->ave_level[i] = NOISE_INIT;
      state->sub_level[i] = 0;
	  state->level[i] = 0.0;
	  state->prevLevel[i] = 0.0;
   }
    state->sp_est_cnt = 0;
    state->sp_max = 0;
    state->sp_max_cnt = 0;
    state->speech_level = SPEECH_LEVEL_INIT;
    state->prev_pow_sum = 0;
   return 0;
}
/******************************************************************************
*
*  Function:   wb_vad_exit
*  Purpose:    The memory used for state memory is freed
*
*******************************************************************************
*******************************************************************************
*/
void wb_vad_exit (
  VadVars **state /* i/o : State structure    */
)
{
    if (state == NULL || *state == NULL)
        return;
    /* deallocate memory */
    free(*state);
    *state = NULL;
    return;
}
/******************************************************************************
*
*     Function     : wb_vad_tone_detection
*     Purpose      : Set tone flag if pitch gain is high. This is used to detect
*                    signaling tones and other signals with high pitch gain.
*
*******************************************************************************
*/
void wb_vad_pitch_tone_detection (
  VadVars *st,  /* i/o : State struct            */
  float p_gain /* pitch gain      */
)
{
   /* update tone flag and pitch flag */
   st->pitch_tone = (Word16)((st->pitch_tone)>>1);
   /* if (pitch_gain > TONE_THR)
          set tone flag
   */
   if (p_gain > TONE_THR)
   {
      st->pitch_tone = (Word16)(st->pitch_tone | 0x4000);
   }
}
/******************************************************************************
*
*     Function     : wb_vad
*     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
*
*******************************************************************************
*/
Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */
  VadVars *st,      /* i/o : State structure                 */
  float in_buf[]   /* i   : samples of the input frame   */
)
{
   Word16 i;
   Word16 VAD_flag;
   float temp;
   double L_temp, pow_sum;
   for(i=0;i<COMPLEN;i++){
		st->prevLevel[i] = st->level[i];
   }
   /* Calculate power of the input frame. */
   L_temp = 0.0;
   for (i = 0; i < FRAME_LEN; i++)
   {
     L_temp += in_buf[i] * in_buf[i];
   }
   L_temp *= 2.0;
   /* pow_sum = power of current frame and previous frame */
   pow_sum = L_temp + st->prev_pow_sum;
   /* save power of current frame for next call */
   st->prev_pow_sum = L_temp;
   /* If input power is very low, clear tone flag */
   if (pow_sum < POW_PITCH_TONE_THR)
   {
      st->pitch_tone = (Word16)(st->pitch_tone & 0x1fff);
   }
   /*  Run the filter bank and calculate signal levels at each band */
   filter_bank(st, in_buf, st->level);
   /* compute VAD decision */
   VAD_flag = vad_decision(st, st->level, pow_sum);
   /* Calculate input level */
   L_temp = 0.0;
   for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
   {
      L_temp += st->level[i];
   }
   temp = (float)(L_temp/16.0f);
   Estimate_Speech(st, temp); /* Estimate speech level */
   return(VAD_flag);
}
上一页 12
💿 文件大小 1189 K
👤 上传用户 wentianliang
📂 所属分类语音压缩
🏷️ 相关标签

#AMR-WB #语音压缩 #编码 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -