⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 enc_dtx.c

📁 ffmpeg源码分析
💻 C
📖 第 1 页 / 共 3 页
字号:
/*
 *===================================================================
 *  3GPP AMR Wideband Floating-point Speech Codec
 *===================================================================
 */
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include "typedef.h"
#include "enc_lpc.h"
#include "enc_util.h"


#define DTX_HIST_SIZE_MIN_ONE       7
#define DTX_HANG_CONST              7     /* yields eight frames of SP HANGOVER  */
#define DTX_ELAPSED_FRAMES_THRESH   (24 + 7 -1)
#define MED_THRESH                  2.25
#define GAIN_THR                    1.406
#define ORDER                       16    /* order of linear prediction filter   */
#define RANDOM_INITSEED             21845 /* own random init value               */
#define MRDTX                       10

#define SIZE_BK_NOISE1  64
#define SIZE_BK_NOISE2  64
#define SIZE_BK_NOISE3  64
#define SIZE_BK_NOISE4  32
#define SIZE_BK_NOISE5  32

#define FRAME_LEN 256   /* Length (samples) of the input frame */
#define SCALE     128   /* (UNITY * UNITY) / 512               */
#define TONE_THR 0.65f  /* Threshold for tone detection        */

/* constants for speech level estimation */
#define SP_EST_COUNT       80
#define SP_ACTIVITY_COUNT  25
#define ALPHA_SP_UP     (1.0f - 0.85f)
#define ALPHA_SP_DOWN   (1.0f - 0.85f)

#define NOM_LEVEL          2050.0F              /* about -26 dBov                */
#define SPEECH_LEVEL_INIT  NOM_LEVEL
#define MIN_SPEECH_LEVEL1  (NOM_LEVEL * 0.063F) /* NOM_LEVEL -24 dB              */
#define MIN_SPEECH_LEVEL2  (NOM_LEVEL * 0.2F)   /* NOM_LEVEL -14 dB              */
#define MIN_SPEECH_SNR     0.125F               /* 0 dB, lowest SNR estimation   */

/* Constants for background spectrum update */
#define ALPHA_UP1   (1.0f - 0.95f)  /* Normal update, upwards:   */
#define ALPHA_DOWN1 (1.0f - 0.936f) /* Normal update, downwards  */
#define ALPHA_UP2   (1.0f - 0.985f) /* Forced update, upwards    */
#define ALPHA_DOWN2 (1.0f - 0.943f) /* Forced update, downwards  */
#define ALPHA3      (1.0f - 0.95f)  /* Update downwards          */
#define ALPHA4      (1.0f - 0.9f)   /* For stationary estimation */
#define ALPHA5      (1.0f - 0.5f)   /* For stationary estimation */

/* Constants for VAD threshold */
#define THR_MIN   (1.6F * SCALE) /* Minimum threshold                            */
#define THR_HIGH  (6.0F * SCALE) /* Highest threshold                            */
#define THR_LOW   (1.7F * SCALE) /* Lowest threshold                             */
#define NO_P1     31744.0F       /* ilog2(1), Noise level for highest threshold  */
#define NO_P2     19786.0F       /* ilog2(0.1, Noise level for lowest threshold  */
#define NO_SLOPE  ((Float32)(THR_LOW - THR_HIGH) / (Float32)(NO_P2 - NO_P1))
#define SP_CH_MIN (-0.75F * SCALE)
#define SP_CH_MAX (0.75F * SCALE)
#define SP_P1     22527.0F       /* ilog2(NOM_LEVEL / 4)                         */
#define SP_P2     17832.0F       /* ilog2(NOM_LEVEL * 4)                         */
#define SP_SLOPE  ((Float32)(SP_CH_MAX - SP_CH_MIN) / (Float32)(SP_P2 - SP_P1))

/* Constants for hangover length */
#define HANG_HIGH 12          /* longest hangover                 */
#define HANG_LOW  2           /* shortest hangover                */
#define HANG_P1   THR_LOW     /* threshold for longest hangover   */
#define HANG_P2   (4 * SCALE) /* threshold for Word16est hangover */
#define HANG_SLOPE ((Float32)(HANG_LOW - HANG_HIGH) / (Float32)(HANG_P2 - HANG_P1))

/* Constants for burst length */
#define BURST_HIGH   8        /* longest burst length          */
#define BURST_LOW    3        /* shortest burst length         */
#define BURST_P1     THR_HIGH /* threshold for Word32est burst */
#define BURST_P2     THR_LOW  /* threshold for Word16est burst */
#define BURST_SLOPE  ((Float32)(BURST_LOW - BURST_HIGH) / (Float32)(BURST_P2 - BURST_P1))

/* Parameters for background spectrum recovery function */
#define STAT_COUNT      20    /* threshold of stationary detection counter         */
#define STAT_THR_LEVEL  184   /* Threshold level for stationarity detection        */
#define STAT_THR        1000  /* Threshold for stationarity detection              */

/* Limits for background noise estimate */
#define NOISE_MIN    40    /* minimum */
#define NOISE_MAX    20000 /* maximum */
#define NOISE_INIT   150   /* initial */

/* Thresholds for signal power (now calculated on 2 frames) */
#define VAD_POW_LOW        30000.0f   /* If input power is lower than this, VAD is set to 0  */
#define POW_PITCH_TONE_THR 686080.0f  /* If input power is lower, pitch detection is ignored */

/* Constants for the filter bank */
#define COEFF3   0.407806f /* coefficient for the 3rd order filter     */
#define COEFF5_1 0.670013f /* 1st coefficient the for 5th order filter */
#define COEFF5_2 0.195007f /* 2nd coefficient the for 5th order filter */

extern const Float32 E_ROM_en_adjust[];
extern const Float32 E_ROM_mean_isf_noise[];
extern const Float32 E_ROM_dico1_isf_noise[];
extern const Float32 E_ROM_dico2_isf_noise[];
extern const Float32 E_ROM_dico3_isf_noise[];
extern const Float32 E_ROM_dico4_isf_noise[];
extern const Float32 E_ROM_dico5_isf_noise[];
extern const Float32 E_ROM_isf[];


/*
 * E_DTX_isf_history_aver
 *
 * Parameters:
 *    isf_old         I/O: ISF vectors
 *    indices           I: ISF indices
 *    isf_aver          O: averaged ISFs
 *
 * Function:
 *    Perform the ISF averaging
 *
 * Returns:
 *    void
 */
static void E_DTX_isf_history_aver(Float32 isf_old[], Word16 indices[],
                                   Float32 isf_aver[])
{
   Float32 isf_tmp[2 * M];
   Float32 tmp;
   Word32 i, j, k;

   /*
    * Memorize in isf_tmp[][] the ISF vectors to be replaced by
    * the median ISF vector prior to the averaging
    */
   for (k = 0; k < 2; k++)
   {
      if (indices[k] != -1)
      {
         for (i = 0; i < M; i++)
         {
            isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
            isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
         }
      }
   }

   /* Perform the ISF averaging */
   for (j = 0; j < M; j++)
   {
      tmp = 0;

      for (i = 0; i < DTX_HIST_SIZE; i++)
      {
         tmp += isf_old[i * M + j];
      }

      isf_aver[j] = tmp;
   }

   /* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
   for (k = 0; k < 2; k++)
   {
      if (indices[k] != -1)
      {
         for (i = 0; i < M; i++)
         {
            isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
         }
      }
   }

   return;
}

/*
 * E_DTX_dithering_control
 *
 * Parameters:
 *    st                I: state struct
 *
 * Function:
 *    Analysis of the variation and stationarity
 *    of the background noise.
 *
 * Returns:
 *    Dithering decision
 */
static Word16 E_DTX_dithering_control(E_DTX_State * st)
{
   Float32 ISF_diff, gain_diff, mean, tmp;
   Word32 i;
   Word16 CN_dith;

   /* determine how stationary the spectrum of background noise is */
   ISF_diff = 0.0F;

   for (i = 0; i < 8; i++)
   {
      ISF_diff += st->mem_distance_sum[i];
   }

   if (ISF_diff > 5147609.0f)
   {
      CN_dith = 1;
   }
   else
   {
      CN_dith = 0;
   }

   /* determine how stationary the energy of background noise is */
   mean = 0.0f;

   for (i = 0; i < DTX_HIST_SIZE; i++)
   {
      mean += st->mem_log_en[i] / (Float32)DTX_HIST_SIZE;
   }

   gain_diff = 0.0f;

   for (i = 0; i < DTX_HIST_SIZE; i++)
   {
      tmp = (Float32)fabs(st->mem_log_en[i] - mean);
      gain_diff += tmp;
   }

   if (gain_diff > GAIN_THR)
   {
      CN_dith = 1;
   }

   return CN_dith;
}

/*
 * E_DTX_buffer
 *
 * Parameters:
 *    st           I/O: state struct
 *    isf_new        I: isf vector
 *    enr            I: residual energy (for L_FRAME)
 *    codec_mode     I: speech coder mode
 *
 * Function:
 *    Handles the DTX buffer
 *
 * Returns:
 *    void
 */
void E_DTX_buffer(E_DTX_State *st, Float32 isf_new[], Float32 enr,
                  Word16 codec_mode)
{
   Float32 log_en;

   /* update pointer to circular buffer */
   st->mem_hist_ptr++;

   if (st->mem_hist_ptr == DTX_HIST_SIZE)
   {
      st->mem_hist_ptr = 0;
   }

   /* copy isf vector into buffer */
   memcpy(&st->mem_isf[st->mem_hist_ptr * M], isf_new, M * sizeof(Float32));

   enr += 1e-10F;

   log_en = (Float32)(log10(enr / ((Float64)L_FRAME)) / log10(2.0F));

   /* Subtract ~ 3 dB */
   st->mem_log_en[st->mem_hist_ptr] = log_en + E_ROM_en_adjust[codec_mode];

   return;
}

/*
 * E_DTX_frame_indices_find
 *
 * Parameters:
 *    st           I/O: state struct
 *    isf_old_tx     I: isf vector
 *    indices        I: distance indices
 *
 * Function:
 *    Find indices for min/max distances
 *
 * Returns:
 *    void
 */
static void E_DTX_frame_indices_find(E_DTX_State * st, Word16 indices[])
{
   Float32 L_tmp, tmp, summin, summax, summax2nd;
   Word32 i, j, k;
   Word16 ptr;

   /*
    * Remove the effect of the oldest frame from the column
    * sum sumD[0..E_DTX_HIST_SIZE-1]. sumD[E_DTX_HIST_SIZE] is
    * not updated since it will be removed later.
    */

   k = DTX_HIST_SIZE_MIN_ONE;
   j = -1;

   for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
   {
      j = j + k;
      st->mem_distance_sum[i] = st->mem_distance_sum[i] - st->mem_distance[j];
      k--;
   }

   /*
    * Shift the column sum sumD. The element sumD[E_DTX_HIST_SIZE-1]
    * corresponding to the oldest frame is removed. The sum of
    * the distances between the latest isf and other isfs,
    * i.e. the element sumD[0], will be computed during this call.
    * Hence this element is initialized to zero.
    */

   for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
   {
      st->mem_distance_sum[i] = st->mem_distance_sum[i - 1];
   }
   st->mem_distance_sum[0] = 0.0F;

   /*
    * Remove the oldest frame from the distance matrix.
    * Note that the distance matrix is replaced by a one-
    * dimensional array to save static memory.
    */

   k = 0;

   for (i = 27; i >= 12; i = i - k)
   {
      k++;
      for (j = k; j > 0; j--)
      {
         st->mem_distance[i - j + 1] = st->mem_distance[i - j - k];
      }
   }

   /*
    * Compute the first column of the distance matrix D
    * (squared Euclidean distances from isf1[] to isf_old_tx[][]).
    */

   ptr = st->mem_hist_ptr;

   for (i = 1; i < DTX_HIST_SIZE; i++)
   {
      /* Compute the distance between the latest isf and the other isfs. */
      ptr--;

      if (ptr < 0)
      {
         ptr = DTX_HIST_SIZE_MIN_ONE;
      }
      L_tmp = 0;

      for (j = 0; j < M; j++)
      {
         tmp = st->mem_isf[st->mem_hist_ptr * M + j] - st->mem_isf[ptr * M + j];
         L_tmp += tmp * tmp;
      }

      st->mem_distance[i - 1] = L_tmp;

      /* Update also the column sums. */
      st->mem_distance_sum[0] += st->mem_distance[i - 1];
      st->mem_distance_sum[i] += st->mem_distance[i - 1];
   }

   /* Find the minimum and maximum distances */
   summax = st->mem_distance_sum[0];
   summin = st->mem_distance_sum[0];
   indices[0] = 0;
   indices[2] = 0;

   for (i = 1; i < DTX_HIST_SIZE; i++)
   {
      if (st->mem_distance_sum[i] > summax)
      {
         indices[0] = (Word16)i;
         summax = st->mem_distance_sum[i];
      }

      if (st->mem_distance_sum[i] < summin)
      {
         indices[2] = (Word16)i;
         summin = st->mem_distance_sum[i];
      }
   }

   /* Find the second largest distance */
   summax2nd = -100000000.0;
   indices[1] = -1;
   for (i = 0; i < DTX_HIST_SIZE; i++)
   {
      if ((st->mem_distance_sum[i] > summax2nd) && (i != indices[0]))
      {
         indices[1] = (Word16)i;
         summax2nd = st->mem_distance_sum[i];
      }
   }

   for (i = 0; i < 3; i++)
   {
      indices[i] = (Word16)(st->mem_hist_ptr - indices[i]);
      if (indices[i] < 0)
      {
         indices[i] += DTX_HIST_SIZE;
      }
   }

   /*
    * If maximum distance / MED_THRESH is smaller than minimum distance
    * then the median ISF vector replacement is not performed
    */
   L_tmp = (Float32)(summax / MED_THRESH);

   if (L_tmp <= summin)
   {
      indices[0] = -1;
   }

   /*
    * If second largest distance/MED_THRESH is smaller than
    * minimum distance then the median ISF vector replacement is
    * not performed
    */
   L_tmp = (Float32)(summax2nd / MED_THRESH);

   if (L_tmp <= summin)
   {
      indices[1] = -1;
   }

   return;
}

/*
 * E_DTX_isf_q
 *
 * Parameters:
 *    isf            I: ISF in the frequency domain (0..6400)
 *    isf_q          O: quantised ISF
 *    indice         O: quantisation indices
 *
 * Function:
 *    The ISF vector is quantized using VQ with split-by-5
 *
 * Returns:
 *    void
 */
static void E_DTX_isf_q(Float32 *isf, Word16 **indice)
{
   Word32 i;
   Float32 tmp;

   for (i = 0; i < ORDER; i++)
   {
      isf[i] = isf[i] - E_ROM_mean_isf_noise[i];
   }

   (*indice)[0] = E_LPC_isf_sub_vq(&isf[0], E_ROM_dico1_isf_noise, 2,
      SIZE_BK_NOISE1, &tmp);
   (*indice)[1] = E_LPC_isf_sub_vq(&isf[2], E_ROM_dico2_isf_noise, 3,
      SIZE_BK_NOISE2, &tmp);
   (*indice)[2] = E_LPC_isf_sub_vq(&isf[5], E_ROM_dico3_isf_noise, 3,
      SIZE_BK_NOISE3, &tmp);
   (*indice)[3] = E_LPC_isf_sub_vq(&isf[8], E_ROM_dico4_isf_noise, 4,
      SIZE_BK_NOISE4, &tmp);
   (*indice)[4] = E_LPC_isf_sub_vq(&isf[12], E_ROM_dico5_isf_noise, 4,
      SIZE_BK_NOISE5, &tmp);

   return;
}

/*
 * E_DTX_exe
 *
 * Parameters:
 *    st           I/O: state struct
 *    exc2           O: CN excitation
 *    pt_prms        O: analysis parameters
 *
 * Function:
 *    Confort noise parameters are encoded for the SID frame
 *
 * Returns:
 *    void
 */
void E_DTX_exe(E_DTX_State *st, Float32 *exc2, Word16 **pt_prms)
{
   Float32 isf[M];
   Float32 log_en, level, gain, ener;
   Word32 i,j;
   Word16 isf_order[3];
   Word16 CN_dith;

   /* VOX mode computation of SID parameters */

   log_en = 0.0F;
   memset(isf, 0, M * sizeof(Float32));

   /* average energy and isf */
   for (i = 0; i < DTX_HIST_SIZE; i++)
   {
      log_en += st->mem_log_en[i] / (Float32)DTX_HIST_SIZE;
   }

   E_DTX_frame_indices_find(st, isf_order);
   E_DTX_isf_history_aver(st->mem_isf, isf_order, isf);

   for (j = 0; j < M; j++)
   {
      isf[j] = isf[j] / (Float32)DTX_HIST_SIZE;   /* divide by 8 */
   }

   /*  quantize logarithmic energy to 6 bits (-6 : 66 dB) */

   st->mem_log_en_index = (Word16)((log_en + 2.0F) * 2.625F);

   if(st->mem_log_en_index > 63)
   {
      st->mem_log_en_index = 63;
   }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -