⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sp_enc.c

📁 ffmpeg源码分析
💻 C
📖 第 1 页 / 共 5 页
字号:
/*
 * ===================================================================
 *  TS 26.104
 *  REL-5 V5.4.0 2004-03
 *  REL-6 V6.1.0 2004-03
 *  3GPP AMR Floating-point Speech Codec
 * ===================================================================
 *
 */

/*
 * sp_enc.c
 *
 *
 * Project:
 *    AMR Floating-Point Codec
 *
 * Contains:
 *    This module contains all the functions needed encoding 160
 *    16-bit speech samples to AMR encoder parameters.
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include <math.h>
#include <float.h>
#include "sp_enc.h"
#include "rom_enc.h"

/*
 * Definition of structures used in encoding process
 */
typedef struct
{
   Float32 y2;
   Float32 y1;
   Float32 x0;
   Float32 x1;

}Pre_ProcessState;

#ifdef VAD2

/* Defines for VAD2 */
#define	FRM_LEN1		80
#define	DELAY0			24
#define	FFT_LEN1		128

#define	UPDATE_CNT_THLD1	50

#define	INIT_FRAMES		4

#define	CNE_SM_FAC1		0.1
#define	CEE_SM_FAC1		0.55

#define	HYSTER_CNT_THLD1	6	/* forced update constants... */
#define	HIGH_ALPHA1		0.9
#define	LOW_ALPHA1		0.7
#define	ALPHA_RANGE1		(HIGH_ALPHA1-LOW_ALPHA1)

#define NORM_ENRG		(4.0)	/* account for div by 2 by the HPF */
#define	MIN_CHAN_ENRG		(0.0625 / NORM_ENRG)
#define	INE			(16.0 / NORM_ENRG)
#define	NOISE_FLOOR		(1.0 / NORM_ENRG)

#define	PRE_EMP_FAC1		(-0.8)

#define	NUM_CHAN		16
#define	LO_CHAN			0
#define	HI_CHAN			15
#define	UPDATE_THLD		35

#define	SINE_START_CHAN		2
#define	P2A_THRESH		10.0
#define	DEV_THLD1		28.0

/* Defines for the FFT function */
#define	SIZE			128
#define	SIZE_BY_TWO		64
#define	NUM_STAGE		6

#define	PI			3.141592653589793

#define	TRUE			1
#define	FALSE			0

/* Macros */
#define	min(a,b)		((a)<(b)?(a):(b))
#define	max(a,b)		((a)>(b)?(a):(b))
#define	square(a)		((a)*(a))

/* structures */
typedef struct
{
  Float32 pre_emp_mem;
  Word16  update_cnt;
  Word16  hyster_cnt;
  Word16  last_update_cnt;
  Float32 ch_enrg_long_db[NUM_CHAN];
  Word32  Lframe_cnt;
  Float32 ch_enrg[NUM_CHAN];
  Float32 ch_noise[NUM_CHAN];
  Float32 tsnr;
  Word16  hangover;
  Word16  burstcount;
  Word16  fupdate_flag;
  Float32 negSNRvar;
  Float32 negSNRbias;
  Float32 R0;
  Float32 Rmax;
  Word16  LTP_flag;
}vadState;
#else
typedef struct
{
   Float32 bckr_est[COMPLEN];   /* background noise estimate */
   Float32 ave_level[COMPLEN];


   /* averaged input components for stationary estimation */
   Float32 old_level[COMPLEN];   /* input levels of the previous frame */
   Float32 sub_level[COMPLEN];


   /* input levels calculated at the end of a frame (lookahead) */
   Float32 a_data5[3][2];   /* memory for the filter bank */
   Float32 a_data3[5];   /* memory for the filter bank */
   Float32 best_corr_hp;   /* FIP filtered value */


   /* counts length of a speech burst incl HO addition */
   Float32 corr_hp_fast;   /* filtered value */
   Word32 vadreg;   /* flags for intermediate VAD decisions */
   Word32 pitch;   /* flags for pitch detection */
   Word32 oldlag_count, oldlag;   /* variables for pitch detection */
   Word32 complex_high;   /* flags for complex detection */
   Word32 complex_low;   /* flags for complex detection */
   Word32 complex_warning;   /* complex background warning */
   Word32 tone;   /* flags for tone detection */
   Word16 burst_count;   /* counts length of a speech burst */
   Word16 hang_count;   /* hangover counter */
   Word16 stat_count;   /* stationary counter */
   Word16 complex_hang_count;   /* complex hangover counter, used by VAD */
   Word16 complex_hang_timer;   /* hangover initiator, used by CAD */
   Word16 speech_vad_decision;   /* final decision */
   Word16 sp_burst_count;


}vadState;
#endif
#define DTX_HIST_SIZE 8
#define DTX_ELAPSED_FRAMES_THRESH (24 + 7 -1)
#define DTX_HANG_CONST 7   /* yields eight frames of SP HANGOVER */
typedef struct
{
   Float32 lsp_hist[M * DTX_HIST_SIZE];
   Float32 log_en_hist[DTX_HIST_SIZE];
   Word32 init_lsf_vq_index;
   Word16 hist_ptr;
   Word16 log_en_index;
   Word16 lsp_index[3];


   /* DTX handler stuff */
   Word16 dtxHangoverCount;
   Word16 decAnaElapsedCount;


}dtx_encState;
typedef struct
{
   /* gain history */
   Float32 gp[N_FRAME];


   /* counters */
   Word16 count;


}tonStabState;
typedef struct
{
   Word32 past_qua_en[4];


   /* normal MA predictor memory, (contains 20*log10(qua_err)) */
}gc_predState;

typedef struct
{
   Float32 prev_alpha;   /* previous adaptor output, */
   Float32 prev_gc;   /* previous code gain, */
   Float32 ltpg_mem[LTPG_MEM_SIZE];   /* LTP coding gain history, */
   Word16 onset;   /* onset state, */


   /* (ltpg_mem[0] not used for history) */
}gain_adaptState;
typedef struct
{

   Float32 sf0_target_en;
   Float32 sf0_coeff[5];
   Word32 sf0_gcode0_exp;
   Word32 sf0_gcode0_fra;
   Word16 *gain_idx_ptr;


   gc_predState * gc_predSt;
   gc_predState * gc_predUncSt;
   gain_adaptState * adaptSt;
}gainQuantState;
typedef struct
{
   Word32 T0_prev_subframe;   /* integer pitch lag of previous sub-frame */


}Pitch_frState;
typedef struct
{
   Pitch_frState * pitchSt;
}clLtpState;
typedef struct
{
   Float32 ada_w;
   Word32 old_T0_med;
   Word16 wght_flg;


}pitchOLWghtState;
typedef struct
{
   Float32 past_rq[M];   /* Past quantized prediction error */


}Q_plsfState;
typedef struct
{
   /* Past LSPs */
   Float32 lsp_old[M];
   Float32 lsp_old_q[M];


   /* Quantization state */
   Q_plsfState * qSt;
}lspState;
typedef struct
{
   Float32 old_A[M + 1];   /* Last A(z) for case of unstable filter */


}LevinsonState;
typedef struct
{
   LevinsonState * LevinsonSt;
}lpcState;
typedef struct
{
   /* Speech vector */
   Float32 old_speech[L_TOTAL];
   Float32 *speech, *p_window, *p_window_12k2;
   Float32 *new_speech;   /* Global variable */


   /* Weight speech vector */
   Float32 old_wsp[L_FRAME + PIT_MAX];
   Float32 *wsp;


   /* OL LTP states */
   Word32 old_lags[5];
   Float32 ol_gain_flg[2];


   /* Excitation vector */
   Float32 old_exc[L_FRAME + PIT_MAX + L_INTERPOL];
   Float32 *exc;


   /* Zero vector */
   Float32 ai_zero[L_SUBFR + MP1];
   Float32 *zero;


   /* Impulse response vector */
   Float32 *h1;
   Float32 hvec[L_SUBFR * 2];


   /* Substates */
   lpcState * lpcSt;
   lspState * lspSt;
   clLtpState * clLtpSt;
   gainQuantState * gainQuantSt;
   pitchOLWghtState * pitchOLWghtSt;
   tonStabState * tonStabSt;
   vadState * vadSt;

   Word32 dtx;


   dtx_encState * dtxEncSt;

   /* Filter's memory */
   Float32 mem_syn[M], mem_w0[M], mem_w[M];
   Float32 mem_err[M + L_SUBFR], *error;
   Float32 sharp;


}cod_amrState;
typedef struct
{
   cod_amrState * cod_amr_state;
   Pre_ProcessState * pre_state;

   Word32 dtx;


}Speech_Encode_FrameState;


/*
 * Dotproduct40
 *
 *
 * Parameters:
 *    x                 I: First input
 *    y                 I: Second input
 * Function:
 *    Computes dot product size 40
 *
 * Returns:
 *    acc                dot product
 */
static Float64 Dotproduct40( Float32 *x, Float32 *y )
{
   Float64 acc;


   acc = x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3];
   acc += x[4] * y[4] + x[5] * y[5] + x[6] * y[6] + x[7] * y[7];
   acc += x[8] * y[8] + x[9] * y[9] + x[10] * y[10] + x[11] * y[11];
   acc += x[12] * y[12] + x[13] * y[13] + x[14] * y[14] + x[15] * y[15];
   acc += x[16] * y[16] + x[17] * y[17] + x[18] * y[18] + x[19] * y[19];
   acc += x[20] * y[20] + x[21] * y[21] + x[22] * y[22] + x[23] * y[23];
   acc += x[24] * y[24] + x[25] * y[25] + x[26] * y[26] + x[27] * y[27];
   acc += x[28] * y[28] + x[29] * y[29] + x[30] * y[30] + x[31] * y[31];
   acc += x[32] * y[32] + x[33] * y[33] + x[34] * y[34] + x[35] * y[35];
   acc += x[36] * y[36] + x[37] * y[37] + x[38] * y[38] + x[39] * y[39];
   return( acc );
}


/*
 * Autocorr
 *
 *
 * Parameters:
 *    x                 I: Input signal
 *    r                 O: Autocorrelations
 *    wind              I: Window for LPC analysis
 * Function:
 *    Calculate autocorrelation with window, LPC order = M
 *
 * Returns:
 *    void
 */
static void Autocorr( Float32 x[], Float32 r[], const Float32 wind[] )
{
   Word32 i, j;   /* Counters */
   Float32 y[L_WINDOW + M + 1];   /* Windowed signal */
   Float64 sum;   /* temp */


   /*
    * Windowing of signal
    */
   for ( i = 0; i < L_WINDOW; i++ ) {
      y[i] = x[i] * wind[i];
   }

   /*
    * Zero remaining memory
    */
   memset( &y[L_WINDOW], 0, 44 );

   /*
    * Autocorrelation
    */
   for ( i = 0; i <= M; i++ ) {
      sum = 0;

      for ( j = 0; j < L_WINDOW; j += 40 ) {
         sum += Dotproduct40( &y[j], &y[j + i] );
      }
      r[i] = (Float32)sum;
   }
}


/*
 * Levinson
 *
 *
 * Parameters:
 *    old_A             I: Vector of old LP coefficients [M+1]
 *    r                 I: Vector of autocorrelations    [M+1]
 *    a                 O: LP coefficients               [M+1]
 *    rc                O: Reflection coefficients       [4]
 * Function:
 *    Levinson-Durbin algorithm
 *
 * Returns:
 *    void
 *
 */
static void Levinson( Float32 *old_A, Float32 *r, Float32 *A, Float32 *rc )
{
   Float32 sum, at, err;
   Word32 l, j, i;
   Float32 rct[M];   /* temporary reflection coefficients  0,...,m-1 */


   rct[0] = ( -r[1] ) / r[0];
   A[0] = 1.0F;
   A[1] = rct[0];
   err = r[0] + r[1] * rct[0];

   if ( err <= 0.0 )
      err = 0.01F;

   for ( i = 2; i <= M; i++ ) {
      sum = 0.0F;

      for ( j = 0; j < i; j++ )
         sum += r[i - j] * A[j];
      rct[i - 1] = ( -sum ) / ( err );

      for ( j = 1; j <= ( i / 2 ); j++ ) {
         l = i - j;
         at = A[j] + rct[i - 1] *A[l];
         A[l] += rct[i - 1] *A[j];
         A[j] = at;
      }
      A[i] = rct[i - 1];
      err += rct[i - 1] *sum;

      if ( err <= 0.0 )
         err = 0.01F;
   }
   memcpy( rc, rct, 4 * sizeof( Float32 ) );
   memcpy( old_A, A, MP1 * sizeof( Float32 ) );
}


/*
 * lpc
 *
 *
 * Parameters:
 *    old_A             O: Vector of old LP coefficients [M+1]
 *    x                 I: Input signal
 *    x_12k2            I: Input signal 12.2k
 *    a                 O: predictor coefficients
 *    mode              I: AMR mode
 * Function:
 *    LP analysis
 *
 *    In 12.2 kbit/s mode linear prediction (LP) analysis is performed
 *    twice per speech frame using the auto-correlation approach with
 *    30 ms asymmetric windows. No lookahead is used in
 *    the auto-correlation computation.
 *
 *    In other modes analysis is performed once per speech frame
 *    using the auto-correlation approach with 30 ms asymmetric windows.
 *    A lookahead of 40 samples (5 ms) is used in the auto-correlation computation.
 *
 *    The auto-correlations of windowed speech are converted to the LP
 *    coefficients using the Levinson-Durbin algorithm.
 *    Then the LP coefficients are transformed to the Line Spectral Pair
 *    (LSP) domain  for quantization and interpolation purposes.
 *    The interpolated quantified and unquantized filter coefficients
 *    are converted back to the LP filter coefficients
 *    (to construct the synthesis and weighting filters at each subframe).
 *
 * Returns:
 *    void
 *
 */
static void lpc( Float32 *old_A, Float32 x[], Float32 x_12k2[], Float32 a[], enum Mode
      mode )
{
   Word32 i;
   Float32 r[MP1];
   Float32 rc[4];


   if ( mode == MR122 ) {
      Autocorr( x_12k2, r, window_160_80 );

      /*
       * Lag windowing
       */
      for ( i = 1; i <= M; i++ ) {
         r[i] = r[i] * lag_wind[i - 1];
      }
      r[0] *= 1.0001F;

      if ( r[0] < 1.0F )
         r[0] = 1.0F;

      /*
       * Levinson Durbin
       */
      Levinson( old_A, r, &a[MP1], rc );

      /*
       * Autocorrelations
       */
      Autocorr( x_12k2, r, window_232_8 );

      /*
       * Lag windowing
       */
      for ( i = 1; i <= M; i++ ) {
         r[i] = r[i] * lag_wind[i - 1];
      }
      r[0] *= 1.0001F;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -