📄 enc_main.c

📁 Linux 影片撥放解碼 Video DVD
💻 C
📖 第 1 页 / 共 3 页
字号:

   /* Find open loop pitch lag for whole speech frame */

   if (*mode == MODE_7k)
   {
      /* Find open loop pitch lag for whole speech frame */

      T_op = E_GAIN_open_loop_search(wsp, PIT_MIN / OPL_DECIM,
         PIT_MAX / OPL_DECIM, L_FRAME / OPL_DECIM, st->mem_T0_med,
         &(st->mem_ol_gain), st->mem_hf_wsp, st->mem_hp_wsp,
         st->mem_ol_wght_flg);
   }
   else
   {
      /* Find open loop pitch lag for first 1/2 frame */

      T_op = E_GAIN_open_loop_search(wsp, PIT_MIN / OPL_DECIM,
         PIT_MAX / OPL_DECIM, (L_FRAME / 2) / OPL_DECIM, st->mem_T0_med,
         &(st->mem_ol_gain), st->mem_hf_wsp, st->mem_hp_wsp,
         st->mem_ol_wght_flg);
   }

   if (st->mem_ol_gain > 0.6)
   {
      st->mem_T0_med = E_GAIN_olag_median(T_op, st->mem_ol_lag);
      st->mem_ada_w = 1.0F;
   }
   else
   {
      st->mem_ada_w = st->mem_ada_w * 0.9F;
   }

   if (st->mem_ada_w < 0.8)
   {
      st->mem_ol_wght_flg = 0;
   }
   else
   {
      st->mem_ol_wght_flg = 1;
   }

   E_DTX_pitch_tone_detection(st->vadSt, st->mem_ol_gain);

   T_op *= OPL_DECIM;

   if (*mode != MODE_7k)
   {
      /* Find open loop pitch lag for second 1/2 frame */
      T_op2 = E_GAIN_open_loop_search(wsp + ((L_FRAME / 2) / OPL_DECIM),
         PIT_MIN / OPL_DECIM, PIT_MAX / OPL_DECIM, (L_FRAME / 2) / OPL_DECIM,
         st->mem_T0_med, &st->mem_ol_gain, st->mem_hf_wsp, st->mem_hp_wsp,
         st->mem_ol_wght_flg);

      if (st->mem_ol_gain > 0.6)
      {
          st->mem_T0_med = E_GAIN_olag_median(T_op2, st->mem_ol_lag);
          st->mem_ada_w = 1.0F;
      }
      else
      {
          st->mem_ada_w = st->mem_ada_w * 0.9F;
      }

      if (st->mem_ada_w < 0.8)
      {
         st->mem_ol_wght_flg = 0;
      }
      else
      {
         st->mem_ol_wght_flg = 1;
      }

      E_DTX_pitch_tone_detection(st->vadSt, st->mem_ol_gain);

      T_op2 *= OPL_DECIM;
   }
   else
   {
      T_op2 = T_op;
   }

   /*
    * DTX-CNG
    */
   if(*mode == MRDTX)
   {
      /* Buffer isf's and energy */
      E_UTIL_residu(&A[3 * (M + 1)], speech, f_exc, L_FRAME);
      f_tmp = 0.0;

      for(i = 0; i < L_FRAME; i++)
      {
         f_tmp += f_exc[i] * f_exc[i];
      }

      E_DTX_buffer(st->dtx_encSt, isf, f_tmp, codec_mode);

      /* Quantize and code the isfs */
      E_DTX_exe(st->dtx_encSt, f_exc2, &prms);

      /* reset speech coder memories */
      E_MAIN_reset(st, 0);

      /*
       * Update signal for next frame.
       * -> save past of speech[] and wsp[].
       */
      memcpy(st->mem_speech, &st->mem_speech[L_FRAME],
         (L_TOTAL - L_FRAME) * sizeof(Float32));
      memcpy(st->mem_wsp, &st->mem_wsp[L_FRAME / OPL_DECIM],
         (PIT_MAX / OPL_DECIM) * sizeof(Float32));

      return(0);
   }

   /*
    *   ACELP
    */

   /* Quantize and code the isfs */

   if (*mode <= MODE_7k)
   {
      E_LPC_isf_2s3s_quantise(isf, isfq, st->mem_isf_q, indice, 4);
      E_MAIN_parm_store((Word16)indice[0], &prms);
      E_MAIN_parm_store((Word16)indice[1], &prms);
      E_MAIN_parm_store((Word16)indice[2], &prms);
      E_MAIN_parm_store((Word16)indice[3], &prms);
      E_MAIN_parm_store((Word16)indice[4], &prms);
   }
   else
   {
      E_LPC_isf_2s5s_quantise(isf, isfq, st->mem_isf_q, indice, 4);
      E_MAIN_parm_store((Word16)indice[0], &prms);
      E_MAIN_parm_store((Word16)indice[1], &prms);
      E_MAIN_parm_store((Word16)indice[2], &prms);
      E_MAIN_parm_store((Word16)indice[3], &prms);
      E_MAIN_parm_store((Word16)indice[4], &prms);
      E_MAIN_parm_store((Word16)indice[5], &prms);
      E_MAIN_parm_store((Word16)indice[6], &prms);
   }

   /* Convert isfs to the cosine domain */
   E_LPC_isf_isp_conversion(isfq, ispnew_q, M);

   if (*mode == MODE_24k)
   {
      /* Check stability on isf : distance between old isf and current isf */

      f_tmp = 0.0F;
      f_pt_tmp = st->mem_isf;

      for (i=0; i < M - 1; i++)
      {
         f_tmp += (isf[i] - f_pt_tmp[i]) * (isf[i] - f_pt_tmp[i]);
      }

      stab_fac = (Float32)(1.25F - (f_tmp / 400000.0F));

      if (stab_fac > 1.0F)
      {
         stab_fac = 1.0F;
      }

      if (stab_fac < 0.0F)
      {
         stab_fac = 0.0F;
      }

      memcpy(f_pt_tmp, isf, M * sizeof(Float32));
   }


   if (st->mem_first_frame == 1)
   {
      st->mem_first_frame = 0;
      memcpy(st->mem_isp_q, ispnew_q, M * sizeof(Word16));
   }

   /* Find the interpolated isps and convert to a[] for all subframes */
   E_LPC_int_isp_find(st->mem_isp_q, ispnew_q, E_ROM_interpol_frac, s_Aq);


   for (i = 0; i < (NB_SUBFR * (M + 1)); i++)
   {
      Aq[i] = s_Aq[i] * 0.000244140625F; /* 1/4096 */
   }

   /* update isp memory for the next frame */
   memcpy(st->mem_isp_q, ispnew_q, M * sizeof(Word16));

   /*
    * Find the best interpolation for quantized ISPs
    */

   p_Aq = Aq;

   for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
   {
      E_UTIL_residu(p_Aq, &speech[i_subfr], &f_exc[i_subfr], L_SUBFR);
      p_Aq += (M + 1);
   }


   /* Buffer isf's and energy for dtx on non-speech frame */

   if(vad_flag == 0)
   {
      f_tmp = 0.0F;

      for(i = 0; i < L_FRAME; i++)
      {
         f_tmp += f_exc[i] * f_exc[i];
      }

      E_DTX_buffer(st->dtx_encSt, isf, f_tmp, codec_mode);
   }

   /* range for closed loop pitch search in 1st subframe */

   T0_min = T_op - 8;

   if (T0_min < PIT_MIN)
   {
      T0_min = PIT_MIN;
   }

   T0_max = T0_min + 15;

   if (T0_max > PIT_MAX)
   {
      T0_max = PIT_MAX;
      T0_min = T0_max - 15;
   }

   /*
    *          Loop for every subframe in the analysis frame
    *          ---------------------------------------------
    *  To find the pitch and innovation parameters. The subframe size is
    *  L_SUBFR and the loop is repeated L_FRAME/L_SUBFR times.
    *     - compute the target signal for pitch search
    *     - compute impulse response of weighted synthesis filter (h1[])
    *     - find the closed-loop pitch parameters
    *     - encode the pitch dealy
    *     - find 2 lt prediction (with / without LP filter for lt pred)
    *     - find 2 pitch gains and choose the best lt prediction.
    *     - find target vector for codebook search
    *     - update the impulse response h1[] for codebook search
    *     - correlation between target vector and impulse response
    *     - codebook search and encoding
    *     - VQ of pitch and codebook gains
    *     - find voicing factor and tilt of code for next subframe.
    *     - update states of weighting filter
    *     - find excitation and synthesis speech
    */

   p_A = A;
   p_Aq = Aq;

   for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
   {
      pit_flag = i_subfr;

      if ((i_subfr == (2 * L_SUBFR)) & (*mode > MODE_7k))
      {
         pit_flag = 0;

         /* range for closed loop pitch search in 3rd subframe */

         T0_min = T_op2 - 8;

         if (T0_min < PIT_MIN)
         {
            T0_min = PIT_MIN;
         }

         T0_max = T0_min + 15;

         if (T0_max > PIT_MAX)
         {
            T0_max = PIT_MAX;
            T0_min = T0_max - 15;
         }

      }

      /*
       *
       *        Find the target vector for pitch search:
       *        ---------------------------------------
       *
       *             |------|  res[n]
       * speech[n]---| A(z) |--------
       *             |------|       |   |--------| error[n]  |------|
       *                   zero -- (-)--| 1/A(z) |-----------| W(z) |-- target
       *                   exc          |--------|           |------|
       *
       * Instead of subtracting the zero-input response of filters from
       * the weighted input speech, the above configuration is used to
       * compute the target vector.
       *
       */

      for (i = 0; i < M; i++)
      {
         error[i] = (Float32)(speech[i + i_subfr - 16] - st->mem_syn[i]);
      }

      E_UTIL_residu(p_Aq, &speech[i_subfr], &f_exc[i_subfr], L_SUBFR);

      E_UTIL_synthesis(p_Aq, &f_exc[i_subfr], error + M, L_SUBFR, error, 0);

      E_LPC_a_weight(p_A, Ap, GAMMA1, M);

      E_UTIL_residu(Ap, error + M, xn, L_SUBFR);

      E_UTIL_deemph(xn, TILT_FAC, L_SUBFR, &(st->mem_w0));

      /*
       * Find target in residual domain (cn[]) for innovation search.
       */

      /* first half: xn[] --> cn[] */
      memset(f_code, 0, M * sizeof(Float32));

      memcpy(f_code + M, xn, (L_SUBFR / 2) * sizeof(Float32));

      f_tmp = 0.0F;

      E_UTIL_f_preemph(f_code + M, TILT_FAC, L_SUBFR / 2, &f_tmp);

      E_LPC_a_weight(p_A, Ap, GAMMA1, M);

      E_UTIL_synthesis(Ap, f_code + M, f_code + M, L_SUBFR / 2, f_code, 0);

      E_UTIL_residu(p_Aq, f_code + M, cn, L_SUBFR / 2);

      /* second half: res[] --> cn[] (approximated and faster) */
      for(i = (L_SUBFR / 2); i < L_SUBFR; i++)
      {
         cn[i] = f_exc[i_subfr + i];
      }

      /*
       * Compute impulse response, h1[], of weighted synthesis filter
       */

      E_LPC_a_weight(p_A, Ap, GAMMA1, M);

      memset(h1, 0, L_SUBFR * sizeof(Float32));
      memcpy(h1, Ap, (M + 1) * sizeof(Float32));
      E_UTIL_synthesis(p_Aq, h1, h1, L_SUBFR, h1 + (M + 1), 0);

      f_tmp = 0.0;
      E_UTIL_deemph(h1, TILT_FAC, L_SUBFR, &f_tmp);

      /*
       * Closed-loop fractional pitch search
       */

      /* find closed loop fractional pitch  lag */

      if (*mode <= MODE_9k)
      {
         T0 = E_GAIN_closed_loop_search(&f_exc[i_subfr], xn, h1,
            T0_min, T0_max, &T0_frac,
            pit_flag, PIT_MIN, PIT_FR1_8b);

         /* encode pitch lag */

         if (pit_flag == 0)   /* if 1st/3rd subframe */
         {

            /*
             * The pitch range for the 1st/3rd subframe is encoded with
             * 8 bits and is divided as follows:
             *   PIT_MIN to PIT_FR1-1  resolution 1/2 (frac = 0 or 2)
             *   PIT_FR1 to PIT_MAX    resolution 1   (frac = 0)
             */
            if (T0 < PIT_FR1_8b)
            {
               index = (Word16)(T0 * 2 + (T0_frac >> 1) - (PIT_MIN * 2));
            }
            else
            {
               index = (Word16)(T0 - PIT_FR1_8b + ((PIT_FR1_8b - PIT_MIN) * 2));
            }

            E_MAIN_parm_store(index, &prms);

            /* find T0_min and T0_max for subframe 2 and 4 */

            T0_min = T0 - 8;

            if (T0_min < PIT_MIN)
            {
               T0_min = PIT_MIN;
            }

            T0_max = T0_min + 15;

            if (T0_max > PIT_MAX)
            {
               T0_max = PIT_MAX;
               T0_min = T0_max - 15;
            }
         }
         else  /* if subframe 2 or 4 */
         {
            /*
             * The pitch range for subframe 2 or 4 is encoded with 6 bits:
             *   T0_min  to T0_max     resolution 1/2 (frac = 0 or 2)
             */
            i = T0 - T0_min;
            index = (Word16)(i * 2 + (T0_frac >> 1));

            E_MAIN_parm_store(index, &prms);
         }
      }
      else
      {
         T0 = E_GAIN_closed_loop_search(&f_exc[i_subfr], xn, h1,
            T0_min, T0_max, &T0_frac,
            pit_flag, PIT_FR2, PIT_FR1_9b);

         /* encode pitch lag */

         if (pit_flag == 0)   /* if 1st/3rd subframe */
         {
            /*
             * The pitch range for the 1st/3rd subframe is encoded with
             * 9 bits and is divided as follows:
             *   PIT_MIN to PIT_FR2-1  resolution 1/4 (frac = 0,1,2 or 3)
             *   PIT_FR2 to PIT_FR1-1  resolution 1/2 (frac = 0 or 2)
             *   PIT_FR1 to PIT_MAX    resolution 1   (frac = 0)
             */
            if (T0 < PIT_FR2)
            {
               index = (Word16)(T0 * 4 + T0_frac - (PIT_MIN * 4));
            }
            else if (T0 < PIT_FR1_9b)
            {
               index = (Word16)(T0 * 2 + (T0_frac >> 1) - (PIT_FR2 * 2) + ((PIT_FR2 - PIT_MIN) * 4));
            }
            else
            {
               index = (Word16)(T0 - PIT_FR1_9b + ((PIT_FR2 - PIT_MIN) * 4) + ((PIT_FR1_9b - PIT_FR2) * 2));
            }

            E_MAIN_parm_store(index, &prms);

            /* find T0_min and T0_max for subframe 2 and 4 */

            T0_min = T0 - 8;

            if (T0_min < PIT_MIN)
            {
               T0_min = PIT_MIN;
            }

            T0_max = T0_min + 15;

            if (T0_max > PIT_MAX)
            {
               T0_max = PIT_MAX;
               T0_min = T0_max - 15;
            }
         }
         else      /* if subframe 2 or 4 */
         {
            /*
             * The pitch range for subframe 2 or 4 is encoded with 6 bits:
             *   T0_min  to T0_max     resolution 1/4 (frac = 0,1,2 or 3)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -