📄 dec_main.c

📁 关于AMR-WB+语音压缩编码的实现代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 *===================================================================
 *  3GPP AMR Wideband Floating-point Speech Codec
 *===================================================================
 */
#include <stdlib.h>
#include <math.h>
#include <memory.h>
#include <string.h>
#include "typedef.h"
#include "dec_main.h"
#include "dec_dtx.h"
#include "dec_acelp.h"
#include "dec_gain.h"
#include "dec_lpc.h"
#include "dec_util.h"
#define MAX_16 (Word16)0x7fff
#define MIN_16 (Word16)0x8000
#define L_FRAME            256   /* Frame size                             */
#define NB_SUBFR           4     /* Number of subframe per frame           */
#define L_SUBFR            64    /* Subframe size                          */
#define MODE_7k            0     /* modes                                  */
#define MODE_9k            1
#define MODE_12k           2
#define MODE_14k           3
#define MODE_16k           4
#define MODE_18k           5
#define MODE_20k           6
#define MODE_23k           7
#define MODE_24k           8
#define RX_SPEECH_PROBABLY_DEGRADED 1  /* rx types                         */
#define RX_SPEECH_LOST     2
#define RX_SPEECH_BAD      3
#define RX_NO_DATA         7
#define Q_MAX              8     /* scaling max for signal                 */
#define PIT_SHARP          27853 /* pitch sharpening factor = 0.85 Q15     */
#define PIT_MIN            34    /* Minimum pitch lag with resolution 1/4  */
#define PIT_FR2            128   /* Minimum pitch lag with resolution 1/2  */
#define PIT_FR1_9b         160   /* Minimum pitch lag with resolution 1    */
#define PIT_FR1_8b         92    /* Minimum pitch lag with resolution 1    */
extern const Word16 D_ROM_isp[];
extern const Word16 D_ROM_isf[];
extern const Word16 D_ROM_interpol_frac[];
#ifdef WIN32
#pragma warning( disable : 4310)
#endif
/*
 * Decoder_reset
 *
 * Parameters:
 *    st        I/O: pointer to state structure
 *    reset_all   I: perform full reset
 *
 * Function:
 *    Initialisation of variables for the decoder section.
 *
 *
 * Returns:
 *    void
 */
void D_MAIN_reset(void *st, Word16 reset_all)
{
   Word32 i;
   Decoder_State *dec_state;
   dec_state = (Decoder_State*)st;
   memset(dec_state->mem_exc, 0, (PIT_MAX + L_INTERPOL) * sizeof(Word16)); 
   memset(dec_state->mem_isf_q, 0, M * sizeof(Word16));
   dec_state->mem_T0_frac = 0;   /* old pitch value = 64.0 */
   dec_state->mem_T0 = 64;
   dec_state->mem_first_frame = 1;
   dec_state->mem_gc_thres = 0;
   dec_state->mem_tilt_code = 0;
   memset(dec_state->mem_ph_disp, 0, 8 * sizeof(Word16));
   /* scaling memories for excitation */
   dec_state->mem_q = Q_MAX;
   dec_state->mem_subfr_q[3] = Q_MAX;
   dec_state->mem_subfr_q[2] = Q_MAX;
   dec_state->mem_subfr_q[1] = Q_MAX;
   dec_state->mem_subfr_q[0] = Q_MAX;
   if(reset_all != 0)
   {
      /* routines initialization */
      D_GAIN_init(dec_state->mem_gain);
      memset(dec_state->mem_oversamp, 0, (2 * 12) * sizeof(Word16));
      memset(dec_state->mem_sig_out, 0, 6 * sizeof(Word16));
      memset(dec_state->mem_hf, 0, (31 - 1) * sizeof(Word16));
      memset(dec_state->mem_hf3, 0, (31 - 1) * sizeof(Word16));
      memset(dec_state->mem_hp400, 0, 6 * sizeof(Word16));
      D_GAIN_lag_concealment_init(dec_state->mem_lag);
      /* isp initialization */
      memcpy(dec_state->mem_isp, D_ROM_isp, M * sizeof(Word16));
      memcpy(dec_state->mem_isf, D_ROM_isf, M * sizeof(Word16));
      for(i = 0; i < L_MEANBUF; i++)
      {
         memcpy(&dec_state->mem_isf_buf[i * M], D_ROM_isf, M * sizeof(Word16));
      }
      /* variable initialization */ 
      dec_state->mem_deemph = 0;
      dec_state->mem_seed = 21845;   /* init random with 21845 */
      dec_state->mem_seed2 = 21845;
      dec_state->mem_seed3 = 21845;
      dec_state->mem_state = 0;
      dec_state->mem_bfi = 0;
      /* Static vectors to zero */
      memset(dec_state->mem_syn_hf, 0, M16k * sizeof(Word16));
      memset(dec_state->mem_syn_hi, 0, M * sizeof(Word16));
      memset(dec_state->mem_syn_lo, 0, M * sizeof(Word16));
      D_DTX_reset(dec_state->dtx_decSt, D_ROM_isf);
      dec_state->mem_vad_hist = 0;
	  memset(dec_state->mem_syn_out, 0, (PIT_MAX + L_SUBFR) * sizeof(Word16));
	  memset(dec_state->mem_oversamp_hf_plus, 0, (2 * L_FILT) * sizeof(float));
	  memset(dec_state->mem_syn_hf_plus, 0, 8 * sizeof(float));
	  memset(dec_state->lpc_hf_plus + 1, 0, 8 * sizeof(float));
	  dec_state->lpc_hf_plus[0] = 1.0f;
	  dec_state->gain_hf_plus = 0.0f;
	  dec_state->threshold_hf = 0.0f;
	  dec_state->lp_amp_hf = 0.0f;
	  dec_state->ramp_state = 0;
   }
   return;
}
/*
 * Decoder_init
 *
 * Parameters:
 *    spd_state         O: pointer to state structure
 *
 * Function:
 *    Initialization of variables for the decoder section.
 *    Memory allocation.
 *
 * Returns:
 *    return zero if succesful
 */
Word32 D_MAIN_init(void **spd_state)
{
   /* Decoder states */
   Decoder_State *st;
   *spd_state = NULL;
   /*
    * Memory allocation for coder state.
    */ 
   if((st = (Decoder_State*)malloc(sizeof(Decoder_State))) == NULL)
   {
       return(-1);
   }
   st->dtx_decSt = NULL;
   D_DTX_init(&st->dtx_decSt, D_ROM_isf);
   D_MAIN_reset((void *)st, 1);
   *spd_state = (void *)st;
   return(0);
}
/*
 * Decoder_close
 *
 * Parameters:
 *    spd_state   I: pointer to state structure
 *
 * Function:
 *    Free coder memory.
 *
 * Returns:
 *    void
 */
void D_MAIN_close(void **spd_state)
{
   D_DTX_exit(&(((Decoder_State *)(*spd_state))->dtx_decSt));   
   free(*spd_state);                                            
   return;
}
/*
 * Decoder_exe
 *
 * Parameters:
 *    mode           I: used mode
 *    prms           I: parameter vector
 *    synth_out      O: synthesis speech
 *    spe_state      B: state structure
 *    frame_type     I: received frame type
 *
 * Function:
 *    Main decoder routine.
 *
 * Returns:
 *    0 if successful
 */
Word32 D_MAIN_decode(Word16 mode, Word16 prms[], Word16 synth16k[],
                     void *spd_state, UWord8 frame_type)
{
   Word32 code2[L_SUBFR];           /* algebraic codevector                */
   Word32 L_tmp, L_tmp2, L_gain_code, L_stab_fac;
   Word32 i, j, i_subfr, pit_flag;
   Word32 T0, T0_frac, T0_max, select, T0_min = 0;
   Word16 exc2[L_FRAME];            /* excitation vector                   */
   Word16 Aq[NB_SUBFR * (M + 1)];   /* A(z) quantized for the 4 subframes  */
   Word16 code[L_SUBFR];            /* algebraic codevector                */
   Word16 excp[L_SUBFR];            /* excitation vector                   */
   Word16 HfIsf[M16k];
   Word16 ispnew[M];                /* immittance spectral pairs at 4nd sfr*/
   Word16 isf[M];                   /* ISF (frequency domain) at 4nd sfr   */
   Word16 isf_tmp[M];               /* ISF tmp                             */
   Word16 ind[8];                   /* quantization indices                */
   Word16 index, fac, voice_fac, max, Q_new = 0;
   Word16 gain_pit, gain_code, gain_code_lo, tmp;
   Word16 corr_gain = 0;
   UWord16 pit_sharp = 0;
   Word16 *exc;                     /* Excitation vector                   */
   Word16 *p_Aq;                    /* ptr to A(z) for the 4 subframes     */
   Word16 *p_isf;                   /* prt to isf                          */
   Decoder_State *st;   /* Decoder states */
   UWord8 newDTXState, bfi, unusable_frame;
   UWord8 vad_flag;
   st = (Decoder_State*)spd_state;
   /* find the new  DTX state  SPEECH OR DTX */
   newDTXState = D_DTX_rx_handler(st->dtx_decSt, frame_type);
   if(newDTXState != SPEECH)
   {
      D_DTX_exe(st->dtx_decSt, exc2, newDTXState, isf, &prms);
   }
   /* SPEECH action state machine  */
   if((frame_type == RX_SPEECH_BAD) |
      (frame_type == RX_SPEECH_PROBABLY_DEGRADED))
   {
      /* bfi for all index, bits are not usable */
      bfi = 1;
      unusable_frame = 0;
   }
   else if((frame_type == RX_NO_DATA) | (frame_type == RX_SPEECH_LOST))
   {
      /* bfi only for lsf, gains and pitch period */
      bfi = 1;
      unusable_frame = 1;
   }
   else
   {                                                                        
      bfi = 0;
      unusable_frame = 0;
   }
   if(bfi != 0)
   {
      st->mem_state = (UWord8)(st->mem_state + 1);
      if(st->mem_state > 6)
      {
         st->mem_state = 6;
      }
   }
   else
   {
      st->mem_state = (UWord8)(st->mem_state >> 1);
   }
   /*
    * If this frame is the first speech frame after CNI period,
    * set the BFH state machine to an appropriate state depending
    * on whether there was DTX muting before start of speech or not
    * If there was DTX muting, the first speech frame is muted.
    * If there was no DTX muting, the first speech frame is not
    * muted. The BFH state machine starts from state 5, however, to
    * keep the audible noise resulting from a SID frame which is
    * erroneously interpreted as a good speech frame as small as
    * possible (the decoder output in this case is quickly muted)
    */
   if(st->dtx_decSt->mem_dtx_global_state == DTX)
   {
      st->mem_state = 5;
      st->mem_bfi = 0;
   }
   else if(st->dtx_decSt->mem_dtx_global_state == D_DTX_MUTE)
   {
      st->mem_state = 5;
      st->mem_bfi = 1;
   }
   if(newDTXState == SPEECH)
   {
      vad_flag = (UWord8)(*prms++);
      if(bfi == 0)
      {
         if(vad_flag == 0)
         {                                                              
            st->mem_vad_hist = (Word16)(st->mem_vad_hist + 1);
            if(st->mem_vad_hist > 32767)
            {
               st->mem_vad_hist = 32767;
            }
         }
         else
         {
            st->mem_vad_hist = 0;
         }
      }
   }
   /*
    * DTX-CNG
    */      
   if(newDTXState != SPEECH) /* CNG mode */
   {
      /*
       * increase slightly energy of noise below 200 Hz
       * Convert ISFs to the cosine domain
       */
      D_LPC_isf_isp_conversion(isf, ispnew, M);
      D_LPC_isp_a_conversion(ispnew, Aq, M);
      memcpy(isf_tmp, st->mem_isf, M * sizeof(Word16));
      for(i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
      {
         j = (i_subfr >> 6);
         for(i = 0; i < M; i++)
         {
            L_tmp = (isf_tmp[i] * (32767 - D_ROM_interpol_frac[j])) << 1;
            L_tmp = L_tmp + ((isf[i] * D_ROM_interpol_frac[j]) << 1);
            HfIsf[i] = (Word16)((L_tmp + 0x8000) >> 16);
         }
         D_UTIL_dec_synthesis(Aq, &exc2[i_subfr], 0, &synth16k[i_subfr * 5 /4], 
            (Word16) 1, HfIsf, mode, newDTXState, bfi, st);
      }
      /* reset speech coder memories */
      D_MAIN_reset(st, 0);
      memcpy(st->mem_isf, isf, M * sizeof(Word16));
      st->mem_bfi = bfi;
      st->dtx_decSt->mem_dtx_global_state = (UWord8)newDTXState;
      return(0);
   }
   /*
    * ACELP
    */
   exc = st->mem_exc + PIT_MAX + L_INTERPOL;
   /* Decode the ISFs */
   if(mode <= MODE_7k)
   {
      ind[0] = *prms++;
      ind[1] = *prms++;
      ind[2] = *prms++;
      ind[3] = *prms++;
      ind[4] = *prms++;
      D_LPC_isf_2s3s_decode(ind, isf, st->mem_isf_q, st->mem_isf,       
         st->mem_isf_buf, bfi);
   }
   else
   {
      ind[0] = *prms++;
      ind[1] = *prms++;
      ind[2] = *prms++;
      ind[3] = *prms++;
      ind[4] = *prms++;
      ind[5] = *prms++;
      ind[6] = *prms++;
      D_LPC_isf_2s5s_decode(ind, isf, st->mem_isf_q, st->mem_isf,
         st->mem_isf_buf, bfi);
   }
   /* Convert ISFs to the cosine domain */
   D_LPC_isf_isp_conversion(isf, ispnew, M);
   if(st->mem_first_frame != 0)
   {
      st->mem_first_frame = 0;
      memcpy(st->mem_isp, ispnew, M * sizeof(Word16));
   }
   /* Find the interpolated ISPs and convert to a[] for all subframes */
   D_LPC_int_isp_find(st->mem_isp, ispnew, D_ROM_interpol_frac, Aq);
   /* update isp memory for the next frame */
   memcpy(st->mem_isp, ispnew, M * sizeof(Word16));
   /* Check stability on isf : distance between old isf and current isf */
   L_tmp = 0;
   p_isf = st->mem_isf;
   for(i = 0; i < M - 1; i++)
   {
      tmp = (Word16)((isf[i] - p_isf[i]));
      L_tmp = L_tmp + (tmp * tmp);
   }
   if(L_tmp < 3276928)
   {
      L_tmp = L_tmp >> 7;
      L_tmp = (L_tmp * 26214) >> 15;   /* tmp = L_tmp*0.8/256        */
      L_tmp = 20480 - L_tmp;           /* 1.25 - tmp                 */
      L_stab_fac = L_tmp << 1;         /* Q14 -> Q15 with saturation */
      if(L_stab_fac > 0x7FFF)
      {
         L_stab_fac = 0x7FFF;
      }
   }
   else
   {
      L_stab_fac = 0x0;
   }
   memcpy(isf_tmp, st->mem_isf, M * sizeof(Word16));
   memcpy(st->mem_isf, isf, M * sizeof(Word16));
   /*
    * Loop for every subframe in the analysis frame
    *
    * The subframe size is L_SUBFR and the loop is repeated L_FRAME/L_SUBFR
    * times
    *   - decode the pitch delay and filter mode
    *   - decode algebraic code
    *   - decode pitch and codebook gains
    *   - find voicing factor and tilt of code for next subframe
    *   - find the excitation and compute synthesis speech
    */
   p_Aq = Aq;   /* pointer to interpolated LPC parameters */
   for(i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
   {
      pit_flag = i_subfr;
      if((i_subfr == (2 * L_SUBFR)) & (mode > MODE_7k))
      {
         pit_flag = 0;
      }
      /*
       * - Decode pitch lag
       * Lag indeces received also in case of BFI,
       * so that the parameter pointer stays in sync.
       */
      if(pit_flag == 0)
      {
         if(mode <= MODE_9k)
         {                                                              
            index = *prms++;
            if(index < ((PIT_FR1_8b - PIT_MIN) * 2))
            {
               T0 = (PIT_MIN + (index >> 1));
               T0_frac = (index - ((T0 - PIT_MIN) << 1));
               T0_frac = (T0_frac << 1);
            }
            else
            {
               T0 = index + (PIT_FR1_8b - ((PIT_FR1_8b - PIT_MIN) * 2));
               T0_frac = 0;
            }
         }
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -