📄 enc_main.c
字号:
/*
*===================================================================
* 3GPP AMR Wideband Floating-point Speech Codec
*===================================================================
*/
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include <stdio.h>
#include "enc_dtx.h"
#include "enc_acelp.h"
#include "enc_lpc.h"
#include "enc_main.h"
#include "enc_gain.h"
#include "enc_util.h"
#ifdef WIN32
#pragma warning( disable : 4310)
#endif
#include "typedef.h"
#define MAX_16 (Word16)0x7fff
#define MIN_16 (Word16)0x8000
#define Q_MAX 8 /* scaling max for signal */
#define PREEMPH_FAC 0.68F /* preemphasis factor */
#define GAMMA1 0.92F /* Weighting factor (numerator) */
#define TILT_FAC 0.68F /* tilt factor (denominator) */
#define PIT_MIN 34 /* Minimum pitch lag with resolution 1/4 */
#define PIT_FR2 128 /* Minimum pitch lag with resolution 1/2 */
#define PIT_FR1_9b 160 /* Minimum pitch lag with resolution 1 */
#define PIT_FR1_8b 92 /* Minimum pitch lag with resolution 1 */
#define PIT_MAX 231 /* Maximum pitch lag */
#define L_INTERPOL (16+1) /* Length of filter for interpolation */
#define L_FRAME16k 320 /* Frame size at 16kHz */
#define L_SUBFR 64 /* Subframe size */
#define NB_SUBFR 4 /* Number of subframe per frame */
#define L_FILT 12 /* Delay of up-sampling filter */
#define L_NEXT 64 /* Overhead in LP analysis */
#define MODE_7k 0 /* modes */
#define MODE_9k 1
#define MODE_12k 2
#define MODE_14k 3
#define MODE_16k 4
#define MODE_18k 5
#define MODE_20k 6
#define MODE_23k 7
#define MODE_24k 8
#define MRDTX 10
extern const Word16 E_ROM_isp[];
extern const Word16 E_ROM_isf[];
extern const Word16 E_ROM_interpol_frac[];
/*
* E_MAIN_reset
*
* Parameters:
* st I/O: pointer to state structure
* reset_all I: perform full reset
*
* Function:
* Initialisation of variables for the coder section.
*
*
* Returns:
* void
*/
void E_MAIN_reset(void *st, Word16 reset_all)
{
Word16 i;
Coder_State *cod_state;
cod_state = (Coder_State *) st;
memset(cod_state->mem_exc, 0, (PIT_MAX + L_INTERPOL) * sizeof(Word16));
memset(cod_state->mem_isf_q, 0, M * sizeof(Word16));
memset(cod_state->mem_syn, 0, M * sizeof(Float32));
cod_state->mem_w0 = 0.0F;
cod_state->mem_tilt_code = 0;
cod_state->mem_first_frame = 1;
E_GAIN_clip_init(cod_state->mem_gp_clip);
cod_state->mem_gc_threshold = 0.0F;
if (reset_all != 0)
{
/* Set static vectors to zero */
memset(cod_state->mem_speech, 0, (L_TOTAL - L_FRAME + 460) * sizeof(Float32));
memset(cod_state->mem_wsp, 0, (PIT_MAX / OPL_DECIM) * sizeof(Float32));
memset(cod_state->mem_decim2, 0, 3 * sizeof(Float32));
/* routines initialization */
memset(cod_state->mem_decim, 0, 2 * L_FILT16k * sizeof(Float32));
memset(cod_state->mem_sig_in, 0, 4 * sizeof(Float32));
E_ACELP_Gain2_Q_init(cod_state->mem_gain_q);
memset(cod_state->mem_hf_wsp, 0, 8 * sizeof(Float32));
/* isp initialization */
for (i = 0; i < M - 1; i++)
{
cod_state->mem_isp[i] =
(Float32)cos(3.141592654 * (Float32)(i + 1) / (Float32)M);
}
cod_state->mem_isp[M - 1] = 0.045F;
memcpy(cod_state->mem_isp_q, E_ROM_isp, M * sizeof(Word16));
/* variable initialization */
cod_state->mem_preemph = 0.0F;
cod_state->mem_wsp_df = 0.0F;
cod_state->mem_q = Q_MAX;
cod_state->mem_subfr_q[3] = Q_MAX;
cod_state->mem_subfr_q[2] = Q_MAX;
cod_state->mem_subfr_q[1] = Q_MAX;
cod_state->mem_subfr_q[0] = Q_MAX;
cod_state->mem_ada_w = 0.0F;
cod_state->mem_ol_gain = 0.0F;
cod_state->mem_ol_wght_flg = 0;
for (i = 0; i < 5; i++)
{
cod_state->mem_ol_lag[i] = 40;
}
cod_state->mem_T0_med = 40;
memset(cod_state->mem_hp_wsp, 0,
( ( L_FRAME / 2 ) / OPL_DECIM + ( PIT_MAX / OPL_DECIM ) )
* sizeof(Float32) );
memset(cod_state->mem_syn_hf, 0, M * sizeof(Float32));
memset(cod_state->mem_syn2, 0, M * sizeof(Float32));
memset(cod_state->mem_hp400, 0, 4 * sizeof(Float32));
memset(cod_state->mem_sig_out, 0, 4 * sizeof(Float32));
memset(cod_state->mem_hf, 0, 2 * L_FILT16k * sizeof(Float32));
memset(cod_state->mem_hf2, 0, 2 * L_FILT16k * sizeof(Float32));
memset(cod_state->mem_hf3, 0, 2 * L_FILT16k * sizeof(Float32));
memcpy(cod_state->mem_isf, E_ROM_isf, M * sizeof(Float32));
cod_state->mem_deemph = 0.0F;
cod_state->mem_seed = 21845;
cod_state->mem_gain_alpha = 1.0F;
cod_state->mem_vad_hist = 0;
E_DTX_reset(cod_state->dtx_encSt);;
E_DTX_vad_reset(cod_state->vadSt);
}
}
/*
* E_MAIN_init
*
* Parameters:
* spe_state I/O: pointer to state structure
*
* Function:
* Initialisation of variables for the coder section.
* Memory allocation.
*
* Returns:
* void
*/
Word16 E_MAIN_init(void **spe_state)
{
Coder_State *st;
*spe_state = NULL;
/* allocate memory */
if ((st = (Coder_State *) malloc(sizeof(Coder_State))) == NULL)
{
return(-1);
}
st->vadSt = NULL;
st->dtx_encSt = NULL;
E_DTX_init(&(st->dtx_encSt));
E_DTX_vad_init(&(st->vadSt));
E_MAIN_reset((void *) st, 1);
*spe_state = (void*)st;
return(0);
}
/*
* E_MAIN_close
*
*
* Parameters:
* spe_state I: pointer to state structure
*
* Function:
* Free coder memory.
*
*
* Returns:
* void
*/
void E_MAIN_close(void **spe_state)
{
E_DTX_exit(&( ( (Coder_State *)(*spe_state) )->dtx_encSt));
E_DTX_vad_exit(&( ( (Coder_State *) (*spe_state) )->vadSt));
free(*spe_state);
return;
}
/*
* E_MAIN_parm_store
*
* Parameters:
* value I: parameter value
* prms O: output parameters
*
* Function:
* Store parameter values
*
* Returns:
* void
*/
static void E_MAIN_parm_store(Word32 value, Word16 **prms)
{
**prms = (Word16)value;
(*prms)++;
return;
}
void E_MAIN_encode_first(Word16 speech16k[], void *spe_state)
{
/* Float32 */
Float32 f_speech16k[L_FRAME16k]; /* Speech vector */
Float32 error[M + L_SUBFR]; /* error of quantization */
Float32 f_code[L_SUBFR]; /* Fixed codebook excitation */
Float32 f_tmp;
Float32 *new_speech; /* Speech vector */
/* Word32 */
Word32 i;
/* Other */
Coder_State *st; /* Coder states */
/* Memory Usage eval */
st = (Coder_State *)spe_state;
/*
* Initialize pointers to speech vector.
*
*
* |-------|-------|-------|-------|-------|-------|
* past sp sf1 sf2 sf3 sf4 L_NEXT
* <------- Total speech buffer (L_TOTAL) ------>
* old_speech
* <------- LPC analysis window (L_WINDOW) ------>
* <-- present frame (L_FRAME) ---->
* | <----- new speech (L_FRAME) ---->
* | |
* speech |
* new_speech
*/
new_speech = st->mem_speech + L_TOTAL - 2*L_FRAME - L_FILT + 460;
for(i = 0; i < L_FRAME16k; i++)
{
f_speech16k[i] = (Float32)speech16k[i];
}
/*
* Down sampling signal from 16kHz to 12.8kHz
*/
E_UTIL_decim_12k8(f_speech16k, L_FRAME16k, new_speech, st->mem_decim);
/* decimate with zero-padding to avoid delay of filter */
memcpy(f_code, st->mem_decim, 2 * L_FILT16k * sizeof(Float32));
memset(error, 0, L_FILT16k * sizeof(Float32));;
E_UTIL_decim_12k8(error, L_FILT16k, new_speech + L_FRAME, f_code);
/*
* Perform 50Hz HP filtering of input signal.
* Perform fixed preemphasis through 1 - g z^-1
*/
E_UTIL_hp50_12k8(new_speech, L_FRAME, st->mem_sig_in);
memcpy(f_code, st->mem_sig_in, 4 * sizeof(Float32) );
E_UTIL_hp50_12k8(new_speech + L_FRAME, L_FILT, f_code);
E_UTIL_f_preemph(new_speech, PREEMPH_FAC, L_FRAME, &(st->mem_preemph));
/* last L_FILT samples for autocorrelation window */
f_tmp = st->mem_preemph;
E_UTIL_f_preemph(new_speech + L_FRAME, PREEMPH_FAC, L_FILT, &f_tmp);
}
/*
* E_MAIN_encode
*
* Parameters:
* mode I: used mode
* input_sp I: 320 new speech samples (at 16 kHz)
* prms O: output parameters
* spe_state B: state structure
* allow_dtx I: DTX ON/OFF
*
* Function:
* Main coder routine.
*
* Returns:
* void
*/
Word16 E_MAIN_encode(Word16 * mode, Word16 speech16k[], Word16 prms[],
void *spe_state, Word16 allow_dtx)
{
/* Float32 */
Float32 f_speech16k[L_FRAME16k]; /* Speech vector */
Float32 f_old_exc[(L_FRAME + 1) + PIT_MAX + L_INTERPOL]; /* Excitation vector */
Float32 f_exc2[L_FRAME]; /* excitation vector */
Float32 error[M + L_SUBFR]; /* error of quantization */
Float32 A[NB_SUBFR * (M + 1)]; /* A(z) unquantized for the 4 subframes */
Float32 Aq[NB_SUBFR * (M + 1)]; /* A(z) quantized for the 4 subframes */
Float32 xn[L_SUBFR]; /* Target vector for pitch search */
Float32 xn2[L_SUBFR]; /* Target vector for codebook search */
Float32 dn[L_SUBFR]; /* Correlation between xn2 and h1 */
Float32 cn[L_SUBFR]; /* Target vector in residual domain */
Float32 h1[L_SUBFR]; /* Impulse response vector */
Float32 f_code[L_SUBFR]; /* Fixed codebook excitation */
Float32 y1[L_SUBFR]; /* Filtered adaptive excitation */
Float32 y2[L_SUBFR]; /* Filtered adaptive excitation */
Float32 synth[L_SUBFR]; /* 12.8kHz synthesis vector */
Float32 r[M + 1]; /* Autocorrelations of windowed speech */
Float32 Ap[M + 1]; /* A(z) with spectral expansion */
Float32 ispnew[M]; /* immittance spectral pairs at 4nd sfr */
Float32 isf[M]; /* ISF (frequency domain) at 4nd sfr */
Float32 g_coeff[5], g_coeff2[2]; /* Correlations */
Float32 gain_pit;
Float32 f_tmp, gain1, gain2;
Float32 stab_fac = 0.0F, fac;
Float32 *new_speech, *speech; /* Speech vector */
Float32 *wsp; /* Weighted speech vector */
Float32 *f_exc; /* Excitation vector */
Float32 *p_A, *p_Aq; /* ptr to A(z) for the 4 subframes */
Float32 *f_pt_tmp;
/* Word32 */
Word32 indice[8]; /* quantization indices */
Word32 vad_flag, clip_gain;
Word32 T_op, T_op2, T0, T0_frac;
Word32 T0_min, T0_max;
Word32 voice_fac, Q_new = 0;
Word32 L_gain_code, l_tmp;
Word32 i, i_subfr, pit_flag;
/* Word16 */
Word16 exc2[L_FRAME]; /* excitation vector */
Word16 s_Aq[NB_SUBFR * (M + 1)]; /* A(z) quantized for the 4 subframes */
Word16 s_code[L_SUBFR]; /* Fixed codebook excitation */
Word16 ispnew_q[M]; /* quantized ISPs at 4nd subframe */
Word16 isfq[M]; /* quantized ISPs */
Word16 select, codec_mode;
Word16 index;
Word16 s_gain_pit, gain_code;
Word16 s_tmp, s_max;
Word16 corr_gain;
Word16 *exc; /* Excitation vector */
/* Other */
Coder_State *st; /* Coder states */
/* Memory Usage eval */
st = (Coder_State *)spe_state;
codec_mode = *mode;
/*
* Initialize pointers to speech vector.
*
*
* |-------|-------|-------|-------|-------|-------|
* past sp sf1 sf2 sf3 sf4 L_NEXT
* <------- Total speech buffer (L_TOTAL) ------>
* old_speech
* <------- LPC analysis window (L_WINDOW) ------>
* <-- present frame (L_FRAME) ---->
* | <----- new speech (L_FRAME) ---->
* | |
* speech |
* new_speech
*/
new_speech = st->mem_speech + L_TOTAL - L_FRAME - L_FILT + 460; /* New speech */
speech = st->mem_speech + L_TOTAL - L_FRAME - L_NEXT; /* Present frame */
exc = st->mem_exc + PIT_MAX + L_INTERPOL;
f_exc = f_old_exc + PIT_MAX + L_INTERPOL;
wsp = st->mem_wsp + (PIT_MAX / OPL_DECIM);
for(i = 0; i < L_FRAME16k; i++)
{
f_speech16k[i] = (Float32)speech16k[i];
}
Q_new = -st->mem_q;
for(i = 0; i < (PIT_MAX + L_INTERPOL); i++)
{
f_old_exc[i] = (Float32)(st->mem_exc[i] * pow(2, Q_new));
}
/*
* Down sampling signal from 16kHz to 12.8kHz
*/
E_UTIL_decim_12k8(f_speech16k, L_FRAME16k, new_speech, st->mem_decim);
/* decimate with zero-padding to avoid delay of filter */
memcpy(f_code, st->mem_decim, 2 * L_FILT16k * sizeof(Float32));
memset(error, 0, L_FILT16k * sizeof(Float32));
E_UTIL_decim_12k8(error, L_FILT16k, new_speech + L_FRAME, f_code);
/*
* Perform 50Hz HP filtering of input signal.
* Perform fixed preemphasis through 1 - g z^-1
*/
E_UTIL_hp50_12k8(new_speech, L_FRAME, st->mem_sig_in);
memcpy(f_code, st->mem_sig_in, 4 * sizeof(Float32) );
E_UTIL_hp50_12k8(new_speech + L_FRAME, L_FILT, f_code);
E_UTIL_f_preemph(new_speech, PREEMPH_FAC, L_FRAME, &(st->mem_preemph));
/* last L_FILT samples for autocorrelation window */
f_tmp = st->mem_preemph;
E_UTIL_f_preemph(new_speech + L_FRAME, PREEMPH_FAC, L_FILT, &f_tmp);
/*
* Call VAD
* Preemphesis scale down signal in low frequency and keep dynamic in HF.
* Vad work slightly in future (new_speech = speech + L_NEXT - L_FILT).
*/
vad_flag = E_DTX_vad(st->vadSt, speech + L_NEXT - L_FILT);
if (vad_flag == 0)
{
st->mem_vad_hist = 1;
}
else
{
st->mem_vad_hist = 0;
}
/* DTX processing */
if (allow_dtx)
{
/* Note that mode may change here */
E_DTX_tx_handler(st->dtx_encSt, vad_flag, mode);
}
else
{
E_DTX_reset(st->dtx_encSt);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -