📄 enc_main.c

📁 Linux 影片撥放解碼 Video DVD
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 *===================================================================
 *  3GPP AMR Wideband Floating-point Speech Codec
 *===================================================================
 */
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include <stdio.h>

#include "enc_dtx.h"
#include "enc_acelp.h"
#include "enc_lpc.h"
#include "enc_main.h"
#include "enc_gain.h"
#include "enc_util.h"

#ifdef WIN32
#pragma warning( disable : 4310)
#endif

#include "typedef.h"

#define MAX_16       (Word16)0x7fff
#define MIN_16       (Word16)0x8000
#define Q_MAX        8        /* scaling max for signal                 */
#define PREEMPH_FAC  0.68F    /* preemphasis factor                     */
#define GAMMA1       0.92F    /* Weighting factor (numerator)           */
#define TILT_FAC     0.68F    /* tilt factor (denominator)              */
#define PIT_MIN      34       /* Minimum pitch lag with resolution 1/4  */
#define PIT_FR2      128      /* Minimum pitch lag with resolution 1/2  */
#define PIT_FR1_9b   160      /* Minimum pitch lag with resolution 1    */
#define PIT_FR1_8b   92       /* Minimum pitch lag with resolution 1    */
#define PIT_MAX      231      /* Maximum pitch lag                      */
#define L_INTERPOL   (16+1)   /* Length of filter for interpolation     */
#define L_FRAME16k   320      /* Frame size at 16kHz                    */
#define L_SUBFR      64       /* Subframe size                          */
#define NB_SUBFR     4        /* Number of subframe per frame           */
#define L_FILT       12       /* Delay of up-sampling filter            */
#define L_NEXT       64       /* Overhead in LP analysis                */
#define MODE_7k      0        /* modes                                  */
#define MODE_9k      1
#define MODE_12k     2
#define MODE_14k     3
#define MODE_16k     4
#define MODE_18k     5
#define MODE_20k     6
#define MODE_23k     7
#define MODE_24k     8
#define MRDTX        10

extern const Word16 E_ROM_isp[];
extern const Word16 E_ROM_isf[];
extern const Word16 E_ROM_interpol_frac[];

/*
 * E_MAIN_reset
 *
 * Parameters:
 *    st        I/O: pointer to state structure
 *    reset_all   I: perform full reset
 *
 * Function:
 *    Initialisation of variables for the coder section.
 *
 *
 * Returns:
 *    void
 */
void E_MAIN_reset(void *st, Word16 reset_all)
{
   Word32 i;

   Coder_State *cod_state;

   cod_state = (Coder_State *) st;

   memset(cod_state->mem_exc, 0, (PIT_MAX + L_INTERPOL) * sizeof(Word16));
   memset(cod_state->mem_isf_q, 0, M * sizeof(Word16));
   memset(cod_state->mem_syn, 0, M * sizeof(Float32));

   cod_state->mem_w0 = 0.0F;
   cod_state->mem_tilt_code = 0;
   cod_state->mem_first_frame = 1;

   E_GAIN_clip_init(cod_state->mem_gp_clip);

   cod_state->mem_gc_threshold = 0.0F;

   if (reset_all != 0)
   {
      /* Set static vectors to zero */
      memset(cod_state->mem_speech, 0, (L_TOTAL - L_FRAME) * sizeof(Float32));
      memset(cod_state->mem_wsp, 0, (PIT_MAX / OPL_DECIM) * sizeof(Float32));
      memset(cod_state->mem_decim2, 0, 3 * sizeof(Float32));

      /* routines initialization */

      memset(cod_state->mem_decim, 0, 2 * L_FILT16k * sizeof(Float32));
      memset(cod_state->mem_sig_in, 0, 4 * sizeof(Float32));
      E_ACELP_Gain2_Q_init(cod_state->mem_gain_q);
      memset(cod_state->mem_hf_wsp, 0, 8 * sizeof(Float32));

      /* isp initialization */
      for (i = 0; i < M - 1; i++)
      {
         cod_state->mem_isp[i] =
            (Float32)cos(3.141592654 * (Float32)(i + 1) / (Float32)M);
      }

      cod_state->mem_isp[M - 1] = 0.045F;
      memcpy(cod_state->mem_isp_q, E_ROM_isp, M * sizeof(Word16));

      /* variable initialization */
      cod_state->mem_preemph = 0.0F;
      cod_state->mem_wsp_df = 0.0F;
      cod_state->mem_q = Q_MAX;
      cod_state->mem_subfr_q[3] = Q_MAX;
      cod_state->mem_subfr_q[2] = Q_MAX;
      cod_state->mem_subfr_q[1] = Q_MAX;
      cod_state->mem_subfr_q[0] = Q_MAX;
      cod_state->mem_ada_w = 0.0F;
      cod_state->mem_ol_gain = 0.0F;
      cod_state->mem_ol_wght_flg = 0;

      for (i = 0; i < 5; i++)
      {
         cod_state->mem_ol_lag[i] = 40;
      }

      cod_state->mem_T0_med = 40;
      memset(cod_state->mem_hp_wsp, 0,
         ( ( L_FRAME / 2 ) / OPL_DECIM + ( PIT_MAX / OPL_DECIM ) )
         * sizeof(Float32) );

      memset(cod_state->mem_syn_hf, 0, M * sizeof(Float32));
      memset(cod_state->mem_syn2, 0, M * sizeof(Float32));

      memset(cod_state->mem_hp400, 0, 4 * sizeof(Float32));
      memset(cod_state->mem_sig_out, 0, 4 * sizeof(Float32));

      memset(cod_state->mem_hf, 0, 2 * L_FILT16k * sizeof(Float32));
      memset(cod_state->mem_hf2, 0, 2 * L_FILT16k * sizeof(Float32));
      memset(cod_state->mem_hf3, 0, 2 * L_FILT16k * sizeof(Float32));

      memcpy(cod_state->mem_isf, E_ROM_isf, M * sizeof(Float32));

      cod_state->mem_deemph = 0.0F;

      cod_state->mem_seed = 21845;
      cod_state->mem_gain_alpha = 1.0F;
      cod_state->mem_vad_hist = 0;

      E_DTX_reset(cod_state->dtx_encSt);
      E_DTX_vad_reset(cod_state->vadSt);
   }
}

/*
 * E_MAIN_init
 *
 * Parameters:
 *    spe_state         I/O: pointer to state structure
 *
 * Function:
 *    Initialisation of variables for the coder section.
 *    Memory allocation.
 *
 * Returns:
 *    void
 */
Word16 E_MAIN_init(void **spe_state)
{
   Coder_State *st;

   *spe_state = NULL;

   /* allocate memory */
   if ((st = (Coder_State *) malloc(sizeof(Coder_State))) == NULL)
   {
      return(-1);
   }

   st->vadSt = NULL;
   st->dtx_encSt = NULL;

   E_DTX_init(&(st->dtx_encSt));
   E_DTX_vad_init(&(st->vadSt));

   E_MAIN_reset((void *) st, 1);

   *spe_state = (void*)st;

   return(0);
}

/*
 * E_MAIN_close
 *
 *
 * Parameters:
 *    spe_state   I: pointer to state structure
 *
 * Function:
 *    Free coder memory.
 *
 *
 * Returns:
 *    void
 */
void E_MAIN_close(void **spe_state)
{
   E_DTX_exit(&( ( (Coder_State *)(*spe_state) )->dtx_encSt));
   E_DTX_vad_exit(&( ( (Coder_State *) (*spe_state) )->vadSt));
   free(*spe_state);

   return;
}

/*
 * E_MAIN_parm_store
 *
 * Parameters:
 *    value       I: parameter value
 *    prms        O: output parameters
 *
 * Function:
 *    Store parameter values
 *
 * Returns:
 *    void
 */
static void E_MAIN_parm_store(Word32 value, Word16 **prms)
{
   **prms = (Word16)value;
   (*prms)++;
   return;
}


/*
 * E_MAIN_encode
 *
 * Parameters:
 *    mode        I: used mode
 *    input_sp    I: 320 new speech samples (at 16 kHz)
 *    prms        O: output parameters
 *    spe_state   B: state structure
 *    allow_dtx   I: DTX ON/OFF
 *
 * Function:
 *    Main coder routine.
 *
 * Returns:
 *    void
 */
Word16 E_MAIN_encode(Word16 * mode, Word16 speech16k[], Word16 prms[],
                    void *spe_state, Word16 allow_dtx)
{

   /* Float32 */
   Float32 f_speech16k[L_FRAME16k];    /* Speech vector                          */
   Float32 f_old_exc[(L_FRAME + 1) + PIT_MAX + L_INTERPOL]; /* Excitation vector */
   Float32 f_exc2[L_FRAME];            /* excitation vector                      */
   Float32 error[M + L_SUBFR];         /* error of quantization                  */
   Float32 A[NB_SUBFR * (M + 1)];      /* A(z) unquantized for the 4 subframes   */
   Float32 Aq[NB_SUBFR * (M + 1)];     /* A(z)   quantized for the 4 subframes   */
   Float32 xn[L_SUBFR];                /* Target vector for pitch search         */
   Float32 xn2[L_SUBFR];               /* Target vector for codebook search      */
   Float32 dn[L_SUBFR];                /* Correlation between xn2 and h1         */
   Float32 cn[L_SUBFR];                /* Target vector in residual domain       */
   Float32 h1[L_SUBFR];                /* Impulse response vector                */
   Float32 f_code[L_SUBFR];            /* Fixed codebook excitation              */
   Float32 y1[L_SUBFR];                /* Filtered adaptive excitation           */
   Float32 y2[L_SUBFR];                /* Filtered adaptive excitation           */
   Float32 synth[L_SUBFR];             /* 12.8kHz synthesis vector               */
   Float32 r[M + 1];                   /* Autocorrelations of windowed speech    */
   Float32 Ap[M + 1];                  /* A(z) with spectral expansion           */
   Float32 ispnew[M];                  /* immittance spectral pairs at 4nd sfr   */
   Float32 isf[M];                     /* ISF (frequency domain) at 4nd sfr      */
   Float32 g_coeff[5], g_coeff2[2];    /* Correlations                           */
   Float32 gain_pit;
   Float32 f_tmp, gain1, gain2;
   Float32 stab_fac = 0.0F, fac;
   Float32 *new_speech, *speech;       /* Speech vector                          */
   Float32 *wsp;                       /* Weighted speech vector                 */
   Float32 *f_exc;                     /* Excitation vector                      */
   Float32 *p_A, *p_Aq;                /* ptr to A(z) for the 4 subframes        */
   Float32 *f_pt_tmp;

   /* Word32 */
   Word32 indice[8];                   /* quantization indices                   */
   Word32 vad_flag, clip_gain;
   Word32 T_op, T_op2, T0, T0_frac;
   Word32 T0_min, T0_max;
   Word32 voice_fac, Q_new = 0;
   Word32 L_gain_code, l_tmp;
   Word32 i, i_subfr, pit_flag;

   /* Word16 */
   Word16 exc2[L_FRAME];               /* excitation vector                      */
   Word16 s_Aq[NB_SUBFR * (M + 1)];    /* A(z) quantized for the 4 subframes     */
   Word16 s_code[L_SUBFR];             /* Fixed codebook excitation              */
   Word16 ispnew_q[M];                 /* quantized ISPs at 4nd subframe         */
   Word16 isfq[M];                     /* quantized ISPs                         */
   Word16 select, codec_mode;
   Word16 index;
   Word16 s_gain_pit, gain_code;
   Word16 s_tmp, s_max;
   Word16 corr_gain;
   Word16 *exc;                        /* Excitation vector                      */

   /* Other */
   Coder_State *st;                    /* Coder states                           */

   st = (Coder_State *)spe_state;
   codec_mode = *mode;

   /*
    * Initialize pointers to speech vector.
    *
    *
    *         |-------|-------|-------|-------|-------|-------|
    *          past sp   sf1     sf2     sf3     sf4    L_NEXT
    *         <-------  Total speech buffer (L_TOTAL)   ------>
    *   old_speech
    *         <-------  LPC analysis window (L_WINDOW)  ------>
    *                 <-- present frame (L_FRAME) ---->
    *                 |       <----- new speech (L_FRAME) ---->
    *                 |       |
    *               speech    |
    *                      new_speech
    */

   new_speech = st->mem_speech + L_TOTAL - L_FRAME - L_FILT;     /* New speech     */
   speech     = st->mem_speech + L_TOTAL - L_FRAME - L_NEXT;     /* Present frame  */

   exc = st->mem_exc + PIT_MAX + L_INTERPOL;
   f_exc = f_old_exc + PIT_MAX + L_INTERPOL;
   wsp = st->mem_wsp + (PIT_MAX / OPL_DECIM);

   for(i = 0; i < L_FRAME16k; i++)
   {
      f_speech16k[i] = (Float32)speech16k[i];
   }

   Q_new = -st->mem_q;

   for(i = 0; i < (PIT_MAX + L_INTERPOL); i++)
   {
      f_old_exc[i] = (Float32)(st->mem_exc[i] * pow(2, Q_new));
   }

   /*
    * Down sampling signal from 16kHz to 12.8kHz
    */
   E_UTIL_decim_12k8(f_speech16k, L_FRAME16k, new_speech, st->mem_decim);

   /* decimate with zero-padding to avoid delay of filter */
   memcpy(f_code, st->mem_decim, 2 * L_FILT16k * sizeof(Float32));
   memset(error, 0, L_FILT16k * sizeof(Float32));

   E_UTIL_decim_12k8(error, L_FILT16k, new_speech + L_FRAME, f_code);

   /*
    * Perform 50Hz HP filtering of input signal.
    * Perform fixed preemphasis through 1 - g z^-1
    */
   E_UTIL_hp50_12k8(new_speech, L_FRAME, st->mem_sig_in);

   memcpy(f_code, st->mem_sig_in,  4 * sizeof(Float32) );

   E_UTIL_hp50_12k8(new_speech + L_FRAME, L_FILT, f_code);

   E_UTIL_f_preemph(new_speech, PREEMPH_FAC, L_FRAME, &(st->mem_preemph));

   /* last L_FILT samples for autocorrelation window */
   f_tmp = st->mem_preemph;
   E_UTIL_f_preemph(new_speech + L_FRAME, PREEMPH_FAC, L_FILT, &f_tmp);

   /*
    *  Call VAD
    *  Preemphesis scale down signal in low frequency and keep dynamic in HF.
    *  Vad work slightly in futur (new_speech = speech + L_NEXT - L_FILT).
    */

   vad_flag = E_DTX_vad(st->vadSt, new_speech);

   if (vad_flag == 0)
   {
      st->mem_vad_hist = 1;
   }
   else
   {
      st->mem_vad_hist = 0;
   }

   /* DTX processing */
   if (allow_dtx)
   {
      /* Note that mode may change here */
      E_DTX_tx_handler(st->dtx_encSt, vad_flag, mode);
   }
   else
   {
      E_DTX_reset(st->dtx_encSt);
   }

   if(*mode != MRDTX)
   {
      E_MAIN_parm_store(vad_flag, &prms);
   }

   /*
    *  Perform LPC analysis
    *  --------------------
    *   - autocorrelation + lag windowing
    *   - Levinson-durbin algorithm to find a[]
    *   - convert a[] to isp[]
    *   - convert isp[] to isf[] for quantization
    *   - quantize and code the isf[]
    *   - convert isf[] to isp[] for interpolation
    *   - find the interpolated isps and convert to a[] for the 4 subframes
    */

   /* LP analysis centered at 3nd subframe */
   E_UTIL_autocorr(st->mem_speech, r);
   E_LPC_lag_wind(r + 1, M);  /* Lag windowing    */

   E_LPC_lev_dur(A, r, M);

   E_LPC_a_isp_conversion(A, ispnew, st->mem_isp, M);  /* From A(z) to isp */

   /* Find the interpolated isps and convert to a[] for all subframes */
   E_LPC_f_int_isp_find(st->mem_isp, ispnew, A, NB_SUBFR, M);

   /* update isp memory for the next frame */
   memcpy(st->mem_isp, ispnew, M * sizeof(Float32));

   /* Convert isps to frequency domain 0..6400 */
   E_LPC_isp_isf_conversion(ispnew, isf, M);

   /* check resonance for pitch clipping algorithm */
   E_GAIN_clip_isf_test(isf, st->mem_gp_clip);


   /*
    *  Perform PITCH_OL analysis
    *  -------------------------
    * - Find the residual res[] for the whole speech frame
    * - Find the weighted input speech wsp[] for the whole speech frame
    * - Find the 2 open-loop pitch estimate
    * - Set the range for searching closed-loop pitch in 1st subframe
    */

   p_A = A;

   for (i_subfr = 0;  i_subfr < L_FRAME; i_subfr += L_SUBFR)
   {
      E_LPC_a_weight(p_A, Ap, GAMMA1, M);

      E_UTIL_residu(Ap, &speech[i_subfr], &wsp[i_subfr], L_SUBFR);

      p_A += (M + 1);
   }

   E_UTIL_deemph(wsp, TILT_FAC, L_FRAME, &(st->mem_wsp_df));

   /* decimation of wsp[] to search pitch in LF and to reduce complexity */

   E_GAIN_lp_decim2(wsp, L_FRAME, st->mem_decim2);
12 3 下一页
💿 文件大小 390 K
👤 上传用户 Tonic2009
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Linux #Video #DVD
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -