📄 dec_main.c
字号:
/*
*===================================================================
* 3GPP AMR Wideband Floating-point Speech Codec
*===================================================================
*/
#include <stdlib.h>
#include <math.h>
#include <memory.h>
#include <string.h>
#include "typedef.h"
#include "dec_main.h"
#include "dec_dtx.h"
#include "dec_acelp.h"
#include "dec_gain.h"
#include "dec_lpc.h"
#include "dec_util.h"
#define MAX_16 (Word16)0x7fff
#define MIN_16 (Word16)0x8000
#define L_FRAME 256 /* Frame size */
#define NB_SUBFR 4 /* Number of subframe per frame */
#define L_SUBFR 64 /* Subframe size */
#define MODE_7k 0 /* modes */
#define MODE_9k 1
#define MODE_12k 2
#define MODE_14k 3
#define MODE_16k 4
#define MODE_18k 5
#define MODE_20k 6
#define MODE_23k 7
#define MODE_24k 8
#define RX_SPEECH_PROBABLY_DEGRADED 1 /* rx types */
#define RX_SPEECH_LOST 2
#define RX_SPEECH_BAD 3
#define RX_NO_DATA 7
#define Q_MAX 8 /* scaling max for signal */
#define PIT_SHARP 27853 /* pitch sharpening factor = 0.85 Q15 */
#define PIT_MIN 34 /* Minimum pitch lag with resolution 1/4 */
#define PIT_FR2 128 /* Minimum pitch lag with resolution 1/2 */
#define PIT_FR1_9b 160 /* Minimum pitch lag with resolution 1 */
#define PIT_FR1_8b 92 /* Minimum pitch lag with resolution 1 */
extern const Word16 D_ROM_isp[];
extern const Word16 D_ROM_isf[];
extern const Word16 D_ROM_interpol_frac[];
#ifdef WIN32
#pragma warning( disable : 4310)
#endif
/*
* Decoder_reset
*
* Parameters:
* st I/O: pointer to state structure
* reset_all I: perform full reset
*
* Function:
* Initialisation of variables for the decoder section.
*
*
* Returns:
* void
*/
void D_MAIN_reset(void *st, Word16 reset_all)
{
Word32 i;
Decoder_State *dec_state;
dec_state = (Decoder_State*)st;
memset(dec_state->mem_exc, 0, (PIT_MAX + L_INTERPOL) * sizeof(Word16));
memset(dec_state->mem_isf_q, 0, M * sizeof(Word16));
dec_state->mem_T0_frac = 0; /* old pitch value = 64.0 */
dec_state->mem_T0 = 64;
dec_state->mem_first_frame = 1;
dec_state->mem_gc_thres = 0;
dec_state->mem_tilt_code = 0;
memset(dec_state->mem_ph_disp, 0, 8 * sizeof(Word16));
/* scaling memories for excitation */
dec_state->mem_q = Q_MAX;
dec_state->mem_subfr_q[3] = Q_MAX;
dec_state->mem_subfr_q[2] = Q_MAX;
dec_state->mem_subfr_q[1] = Q_MAX;
dec_state->mem_subfr_q[0] = Q_MAX;
if(reset_all != 0)
{
/* routines initialization */
D_GAIN_init(dec_state->mem_gain);
memset(dec_state->mem_oversamp, 0, (2 * 12) * sizeof(Word16));
memset(dec_state->mem_sig_out, 0, 6 * sizeof(Word16));
memset(dec_state->mem_hf, 0, (31 - 1) * sizeof(Word16));
memset(dec_state->mem_hf3, 0, (31 - 1) * sizeof(Word16));
memset(dec_state->mem_hp400, 0, 6 * sizeof(Word16));
D_GAIN_lag_concealment_init(dec_state->mem_lag);
/* isp initialization */
memcpy(dec_state->mem_isp, D_ROM_isp, M * sizeof(Word16));
memcpy(dec_state->mem_isf, D_ROM_isf, M * sizeof(Word16));
for(i = 0; i < L_MEANBUF; i++)
{
memcpy(&dec_state->mem_isf_buf[i * M], D_ROM_isf, M * sizeof(Word16));
}
/* variable initialization */
dec_state->mem_deemph = 0;
dec_state->mem_seed = 21845; /* init random with 21845 */
dec_state->mem_seed2 = 21845;
dec_state->mem_seed3 = 21845;
dec_state->mem_state = 0;
dec_state->mem_bfi = 0;
/* Static vectors to zero */
memset(dec_state->mem_syn_hf, 0, M16k * sizeof(Word16));
memset(dec_state->mem_syn_hi, 0, M * sizeof(Word16));
memset(dec_state->mem_syn_lo, 0, M * sizeof(Word16));
D_DTX_reset(dec_state->dtx_decSt, D_ROM_isf);
dec_state->mem_vad_hist = 0;
memset(dec_state->mem_syn_out, 0, (PIT_MAX + L_SUBFR) * sizeof(Word16));
memset(dec_state->mem_oversamp_hf_plus, 0, (2 * L_FILT) * sizeof(float));
memset(dec_state->mem_syn_hf_plus, 0, 8 * sizeof(float));
memset(dec_state->lpc_hf_plus + 1, 0, 8 * sizeof(float));
dec_state->lpc_hf_plus[0] = 1.0f;
dec_state->gain_hf_plus = 0.0f;
dec_state->threshold_hf = 0.0f;
dec_state->lp_amp_hf = 0.0f;
dec_state->ramp_state = 0;
}
return;
}
/*
* Decoder_init
*
* Parameters:
* spd_state O: pointer to state structure
*
* Function:
* Initialization of variables for the decoder section.
* Memory allocation.
*
* Returns:
* return zero if succesful
*/
Word32 D_MAIN_init(void **spd_state)
{
/* Decoder states */
Decoder_State *st;
*spd_state = NULL;
/*
* Memory allocation for coder state.
*/
if((st = (Decoder_State*)malloc(sizeof(Decoder_State))) == NULL)
{
return(-1);
}
st->dtx_decSt = NULL;
D_DTX_init(&st->dtx_decSt, D_ROM_isf);
D_MAIN_reset((void *)st, 1);
*spd_state = (void *)st;
return(0);
}
/*
* Decoder_close
*
* Parameters:
* spd_state I: pointer to state structure
*
* Function:
* Free coder memory.
*
* Returns:
* void
*/
void D_MAIN_close(void **spd_state)
{
D_DTX_exit(&(((Decoder_State *)(*spd_state))->dtx_decSt));
free(*spd_state);
return;
}
/*
* Decoder_exe
*
* Parameters:
* mode I: used mode
* prms I: parameter vector
* synth_out O: synthesis speech
* spe_state B: state structure
* frame_type I: received frame type
*
* Function:
* Main decoder routine.
*
* Returns:
* 0 if successful
*/
Word32 D_MAIN_decode(Word16 mode, Word16 prms[], Word16 synth16k[],
void *spd_state, UWord8 frame_type)
{
Word32 code2[L_SUBFR]; /* algebraic codevector */
Word32 L_tmp, L_tmp2, L_gain_code, L_stab_fac;
Word32 i, j, i_subfr, pit_flag;
Word32 T0, T0_frac, T0_max, select, T0_min = 0;
Word16 exc2[L_FRAME]; /* excitation vector */
Word16 Aq[NB_SUBFR * (M + 1)]; /* A(z) quantized for the 4 subframes */
Word16 code[L_SUBFR]; /* algebraic codevector */
Word16 excp[L_SUBFR]; /* excitation vector */
Word16 HfIsf[M16k];
Word16 ispnew[M]; /* immittance spectral pairs at 4nd sfr*/
Word16 isf[M]; /* ISF (frequency domain) at 4nd sfr */
Word16 isf_tmp[M]; /* ISF tmp */
Word16 ind[8]; /* quantization indices */
Word16 index, fac, voice_fac, max, Q_new = 0;
Word16 gain_pit, gain_code, gain_code_lo, tmp;
Word16 corr_gain = 0;
UWord16 pit_sharp = 0;
Word16 *exc; /* Excitation vector */
Word16 *p_Aq; /* ptr to A(z) for the 4 subframes */
Word16 *p_isf; /* prt to isf */
Decoder_State *st; /* Decoder states */
UWord8 newDTXState, bfi, unusable_frame;
UWord8 vad_flag;
st = (Decoder_State*)spd_state;
/* find the new DTX state SPEECH OR DTX */
newDTXState = D_DTX_rx_handler(st->dtx_decSt, frame_type);
if(newDTXState != SPEECH)
{
D_DTX_exe(st->dtx_decSt, exc2, newDTXState, isf, &prms);
}
/* SPEECH action state machine */
if((frame_type == RX_SPEECH_BAD) |
(frame_type == RX_SPEECH_PROBABLY_DEGRADED))
{
/* bfi for all index, bits are not usable */
bfi = 1;
unusable_frame = 0;
}
else if((frame_type == RX_NO_DATA) | (frame_type == RX_SPEECH_LOST))
{
/* bfi only for lsf, gains and pitch period */
bfi = 1;
unusable_frame = 1;
}
else
{
bfi = 0;
unusable_frame = 0;
}
if(bfi != 0)
{
st->mem_state = (UWord8)(st->mem_state + 1);
if(st->mem_state > 6)
{
st->mem_state = 6;
}
}
else
{
st->mem_state = (UWord8)(st->mem_state >> 1);
}
/*
* If this frame is the first speech frame after CNI period,
* set the BFH state machine to an appropriate state depending
* on whether there was DTX muting before start of speech or not
* If there was DTX muting, the first speech frame is muted.
* If there was no DTX muting, the first speech frame is not
* muted. The BFH state machine starts from state 5, however, to
* keep the audible noise resulting from a SID frame which is
* erroneously interpreted as a good speech frame as small as
* possible (the decoder output in this case is quickly muted)
*/
if(st->dtx_decSt->mem_dtx_global_state == DTX)
{
st->mem_state = 5;
st->mem_bfi = 0;
}
else if(st->dtx_decSt->mem_dtx_global_state == D_DTX_MUTE)
{
st->mem_state = 5;
st->mem_bfi = 1;
}
if(newDTXState == SPEECH)
{
vad_flag = (UWord8)(*prms++);
if(bfi == 0)
{
if(vad_flag == 0)
{
st->mem_vad_hist = (Word16)(st->mem_vad_hist + 1);
if(st->mem_vad_hist > 32767)
{
st->mem_vad_hist = 32767;
}
}
else
{
st->mem_vad_hist = 0;
}
}
}
/*
* DTX-CNG
*/
if(newDTXState != SPEECH) /* CNG mode */
{
/*
* increase slightly energy of noise below 200 Hz
* Convert ISFs to the cosine domain
*/
D_LPC_isf_isp_conversion(isf, ispnew, M);
D_LPC_isp_a_conversion(ispnew, Aq, M);
memcpy(isf_tmp, st->mem_isf, M * sizeof(Word16));
for(i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
{
j = (i_subfr >> 6);
for(i = 0; i < M; i++)
{
L_tmp = (isf_tmp[i] * (32767 - D_ROM_interpol_frac[j])) << 1;
L_tmp = L_tmp + ((isf[i] * D_ROM_interpol_frac[j]) << 1);
HfIsf[i] = (Word16)((L_tmp + 0x8000) >> 16);
}
D_UTIL_dec_synthesis(Aq, &exc2[i_subfr], 0, &synth16k[i_subfr * 5 /4],
(Word16) 1, HfIsf, mode, newDTXState, bfi, st);
}
/* reset speech coder memories */
D_MAIN_reset(st, 0);
memcpy(st->mem_isf, isf, M * sizeof(Word16));
st->mem_bfi = bfi;
st->dtx_decSt->mem_dtx_global_state = (UWord8)newDTXState;
return(0);
}
/*
* ACELP
*/
exc = st->mem_exc + PIT_MAX + L_INTERPOL;
/* Decode the ISFs */
if(mode <= MODE_7k)
{
ind[0] = *prms++;
ind[1] = *prms++;
ind[2] = *prms++;
ind[3] = *prms++;
ind[4] = *prms++;
D_LPC_isf_2s3s_decode(ind, isf, st->mem_isf_q, st->mem_isf,
st->mem_isf_buf, bfi);
}
else
{
ind[0] = *prms++;
ind[1] = *prms++;
ind[2] = *prms++;
ind[3] = *prms++;
ind[4] = *prms++;
ind[5] = *prms++;
ind[6] = *prms++;
D_LPC_isf_2s5s_decode(ind, isf, st->mem_isf_q, st->mem_isf,
st->mem_isf_buf, bfi);
}
/* Convert ISFs to the cosine domain */
D_LPC_isf_isp_conversion(isf, ispnew, M);
if(st->mem_first_frame != 0)
{
st->mem_first_frame = 0;
memcpy(st->mem_isp, ispnew, M * sizeof(Word16));
}
/* Find the interpolated ISPs and convert to a[] for all subframes */
D_LPC_int_isp_find(st->mem_isp, ispnew, D_ROM_interpol_frac, Aq);
/* update isp memory for the next frame */
memcpy(st->mem_isp, ispnew, M * sizeof(Word16));
/* Check stability on isf : distance between old isf and current isf */
L_tmp = 0;
p_isf = st->mem_isf;
for(i = 0; i < M - 1; i++)
{
tmp = (Word16)((isf[i] - p_isf[i]));
L_tmp = L_tmp + (tmp * tmp);
}
if(L_tmp < 3276928)
{
L_tmp = L_tmp >> 7;
L_tmp = (L_tmp * 26214) >> 15; /* tmp = L_tmp*0.8/256 */
L_tmp = 20480 - L_tmp; /* 1.25 - tmp */
L_stab_fac = L_tmp << 1; /* Q14 -> Q15 with saturation */
if(L_stab_fac > 0x7FFF)
{
L_stab_fac = 0x7FFF;
}
}
else
{
L_stab_fac = 0x0;
}
memcpy(isf_tmp, st->mem_isf, M * sizeof(Word16));
memcpy(st->mem_isf, isf, M * sizeof(Word16));
/*
* Loop for every subframe in the analysis frame
*
* The subframe size is L_SUBFR and the loop is repeated L_FRAME/L_SUBFR
* times
* - decode the pitch delay and filter mode
* - decode algebraic code
* - decode pitch and codebook gains
* - find voicing factor and tilt of code for next subframe
* - find the excitation and compute synthesis speech
*/
p_Aq = Aq; /* pointer to interpolated LPC parameters */
for(i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
{
pit_flag = i_subfr;
if((i_subfr == (2 * L_SUBFR)) & (mode > MODE_7k))
{
pit_flag = 0;
}
/*
* - Decode pitch lag
* Lag indeces received also in case of BFI,
* so that the parameter pointer stays in sync.
*/
if(pit_flag == 0)
{
if(mode <= MODE_9k)
{
index = *prms++;
if(index < ((PIT_FR1_8b - PIT_MIN) * 2))
{
T0 = (PIT_MIN + (index >> 1));
T0_frac = (index - ((T0 - PIT_MIN) << 1));
T0_frac = (T0_frac << 1);
}
else
{
T0 = index + (PIT_FR1_8b - ((PIT_FR1_8b - PIT_MIN) * 2));
T0_frac = 0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -