⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c_vad.cpp

📁 g711、VAD的激活算法
💻 CPP
字号:
/*
 * vad.cpp
 * 
 * This is a simple vad implementation.  It isn't tuned and testing
 * has been somewhat limited... but it seems to work ok.  All numbers
 * that are db are multiplied by 100 to keep them slightly more
 * accurate and easy to use. (so -7450 is -74.50db)  This code use
 * three functions not provided as part of this file : log10_32, bqInit
 * and bqProcess.  The log function calculates :
 *     100 * 10 * log(x) - 9333
 * which sould be an approximation of the signal energy x 100.
 * bqInit initializes a biquad data structure and bqProcess applies a
 * a biquad to a signal buffer.  A biquad is used by the vad to filter
 * out lower frequency background noise.
 * 
 * This code operates under the assumption that it will get frames with
 * lengths which are a multiple of 5ms.  This is relatively easy to 
 * change  since the only real dependency is the hangover count -- its in
 * frames now but could easily be changed to samples.
 * 
 */

#define VAD_CPP

#include "c_utils.h"
#include "c_vad.h"
 
#define LOCAL static 

#define DATA_FRAME_LENGTH    (5*8)

/* 
 * x = 1.5sec * 1000ms/sec * 8 samples/1 ms
 * samples => +1dB every x samples 
 */
#define VAD_NOISEFLOOR_CNT_INIT      (int)(8*1500)
#define VAD_SIGNALMAX_CNT_INIT       (int)(8*1500)

/* Power Thresholds */
#define VAD_NOISE_TH_BASE        (float) 10.00   /*  10.00 dB Noise Threshold */
#define VAD_NOISE_FLOOR_INIT     (float)-74.00   /* -74.00 dB Initial Noise Floor */
#define VAD_SIGNAL_MAX_INIT      (float)-80.00   /* -80.00 dB Initial Noise Max */
#define VAD_NOISE_TH_MIN         (float)  1.00   /*   1.00 dB Minimum Noise Threshold */

/* High Pass Filter for getting rid of background noise from
 * input signal before energy calculations */
/* Butter : */
#define vhpfB0   (S2byte) 14339
#define vhpfB1   (S2byte)-28678
#define vhpfB2   (S2byte) 14339
#define vhpfA1   (S2byte)-28422
#define vhpfA2   (S2byte) 12550

/* Number of samples of silence before we declare silence period */
/* #samples = 8 samples/ms * 500ms */            
#define VAD_HANGOVER_CNT_INIT        (int)(8*500)

typedef enum {
  VadState_Silence = 0,
  VadState_Speech,
  VadState_Unknown
} t_VadState;

typedef struct _vad {
  
  boolean       enabled;
  
  /* Saved STA between input frames */
  U4byte     sta;

  /* state == 1 if VOICE
   * state == 0 if SILENCE */
  t_VadState state;
  
  /* Countdown of consecutive frames before we declare silence */
  int     hangoverCnt;

  /* Threshold above which a signal is considered to be speech */
  float      noiseTH;

  /* Countdown after which the noise floor is 
   * incremented by 1dB */
  int     noiseFloorCnt;
  
  /* Noise floor in dB */
  float      noiseFloor;
  
  /* Countdown after which the signal max is
   * decremented by 1dB */
  int     signalMaxCnt;

  /* Signal max in dB */
  float      signalMax;  
    
  /* STARise == 1 if sta is rising
   * STARise == 0 if sta is falling */   
  int     STARise;

  int     stateTxCount;
  
  /* High Pass Filter for input signal */
  t_biquad  *bq;
    
} t_vad;

/* LOCAL */
LOCAL t_biquad vadbq;
LOCAL t_vad vadd;

void
vadInit()
{ 

  vadd.enabled          = TRUE;
  vadd.bq               = &vadbq;
  vadd.sta              = 10000;
  vadd.noiseTH          = VAD_NOISE_TH_BASE;
  vadd.state            = VadState_Unknown;
  vadd.noiseFloorCnt    = VAD_NOISEFLOOR_CNT_INIT;      
  vadd.noiseFloor       = VAD_NOISE_FLOOR_INIT;                         
  vadd.hangoverCnt      = VAD_HANGOVER_CNT_INIT;
  vadd.STARise          = 1;                                
  
  vadd.stateTxCount     = 0;

  vadd.signalMax        = VAD_SIGNAL_MAX_INIT;
  vadd.signalMaxCnt     = VAD_SIGNALMAX_CNT_INIT;
    
  bqInit(vadd.bq, vhpfB0, vhpfB1, vhpfB2, vhpfA1, vhpfA2);
}
   
LOCAL U4byte
computeSTA(S2byte *pdata, int length, U4byte *minSta)
{
  int  i;
  S4byte  acc0,acc1;
  U4byte  maxSta;
  
  *minSta = vadd.sta;
  maxSta = vadd.sta;
  
  for (i = 0; i < length; i++)
    {
      
      /* q.15 * q.15 = q.30 */       
      acc1  = pdata[i] * pdata[i];
      
      if ( vadd.STARise )
        {
          acc0  = -1 * (S4byte)(vadd.sta >> 6);
          acc1  = acc1 >> 5;
        }
      else
        {
          acc0  = -1 * (S4byte)(vadd.sta >> 9);
          acc1  = acc1 >> 8;
        } /* if */
          
      acc0 += acc1;
      vadd.STARise = ( 0 >= acc0 ) ? 0 : 1;
      vadd.sta += acc0;      

      if ( vadd.sta > maxSta )
        {
          maxSta = vadd.sta;	// arijit - i added the cast
        }
      else if ( vadd.sta < *minSta )
        {
          *minSta = vadd.sta;
        }
                
    } /* for */
  
  return maxSta;
  
}

LOCAL void 
computeNFE(float minpower, float maxpower, int length)
{
    
  if ( minpower <= vadd.noiseFloor ) 
    { 
      vadd.noiseFloor = minpower;
      vadd.noiseFloorCnt = VAD_NOISEFLOOR_CNT_INIT;
    }
  else
    { 
      if ( vadd.noiseFloorCnt < length )
        {
          vadd.noiseFloor += 1;
          vadd.noiseFloorCnt = 
            (VAD_NOISEFLOOR_CNT_INIT + vadd.noiseFloorCnt - length);
        }
      else
        {
          vadd.noiseFloorCnt -= length;
        }
    }
 
}

unsigned long stopCount = 32000;

LOCAL boolean
vadSubProcess(S2byte *data, int length)
{
  boolean  SpeechDetected;
  boolean  FrameSpeechFlag;
  S2byte   tmpData[DATA_FRAME_LENGTH];
  U4byte   sta[2];
  float    power[2];
  static unsigned long count = 0;
  
  SpeechDetected  = TRUE;
  FrameSpeechFlag = FALSE;
  
  bqProcess(vadd.bq, data, tmpData, length);
 
  sta[1] = computeSTA(tmpData, length, &sta[0]);
  
  calcPower(2, sta, power);
   
  computeNFE(power[0], power[1], length);

  count += length;
  if (count >= stopCount)
	  {
		  count = 0;
	  }

  if (power[1] > (vadd.noiseFloor + vadd.noiseTH)) 
    { 
      FrameSpeechFlag = TRUE;  
    } 
          
  if  ( FrameSpeechFlag == FALSE) 
    { 
      if ( vadd.hangoverCnt < length ) 
        {
          SpeechDetected  = FALSE;
          vadd.hangoverCnt = 0;
		  if ( vadd.state != VadState_Silence )
		  {
			  vadd.stateTxCount++;
		  }
          vadd.state = VadState_Silence;
        }
      else
        { 
          vadd.hangoverCnt -= length;
        }
    }
  else 
    {
      vadd.hangoverCnt = VAD_HANGOVER_CNT_INIT;
	  if ( vadd.state == VadState_Silence )
		  {
			  vadd.stateTxCount++;
		  }
      vadd.state = VadState_Speech;
    }
      
  return SpeechDetected;
  
}

/* 
 * Returns:  true for speech
 *          false for silence 
 */  
boolean
vadProcess(S2byte *data, int length)
{ 

  /* vadProcess locals */
  int idx;
  int step;
  boolean ret;

  ret = FALSE;
    
  if ( vadd.enabled == TRUE )
    {

	  /* Cut up the frame into 5ms chunks for processing purposes */
      for (idx = 0; length > 0; length -= step)
        {  
		  step = (length < DATA_FRAME_LENGTH) ? length : DATA_FRAME_LENGTH;
          ret |= vadSubProcess(&data[idx], step);
          idx += step;
        }

  }
  else
    {
      ret = TRUE;
    }
    
  return ret;

}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -