📄 g723_codec.c
字号:
/******************************************************************************// INTEL CORPORATION PROPRIETARY INFORMATION// This software is supplied under the terms of a license agreement or// nondisclosure agreement with Intel Corporation and may not be copied// or disclosed except in accordance with the terms of that agreement.// Copyright (c) 2001-03 Intel Corporation. All Rights Reserved.//// Filename:// g723_codec.c //// Description:// Intel Integrated Performance Primitives (IPP)// G.723.1 encoder and decoder codec API// Constructed from the IPP building blocks for ITU G.723.1//// Revision:// 1.0//// References:// [1] ITU-T Rec. G.723.1, "Dual Rate Speech Coder for Multimedia// Communications Transmitting at 5.3 and 6.3 kbit/s," // Telecommunications Standardization Sector of ITU,// March, 1996. // [2] ITU-T Rec. G.723.1 fixed-point C reference code ("96_03"),// March, 1996.// [3] ITU-T Rec. G.723.1 Annex A, "Annex A: Silence Compression Scheme,"// Telecommunications Standardization Sector of ITU,// November, 1996. ******************************************************************************//***************************************************************************************//// The following functions are defined in this file:// // EncoderInit_G723 (ippG723EncoderState *pDstEncoderState);//// Encode_G723_16s8u (const IppSound *pSrcSpeech, IppBitstream *pDstBitstream, // IppSpchBitRate bitRate, int enableVad, // int enableHighpassFilter, IppG723EncoderState *pEncoderState);//// EncodeBlock_G723_16s8u (IppPcmStream *speech, IppBitstream *bitstream, // Ipp16s dtxEnable, IppSpchBitRate rate,// IppG723EncoderState *state);//// DecoderInit_G723 (IppG723DecoderState *pDstDecoderState);//// Decode_G723_8u16s (const IppBitstream *pSrcBitstream, IppSound *pDstSpeech, // int erasureFrame, IppG723DecoderState *pDecoderState);//// DecodeBlock_G723_8u16s (IppBitstream *bitstream, IppPcmStream *speech,// IppG723DecoderState *state);//******************************************************************************************//******************************** Standard definitions ********************************/#include <g723_def.h>#include <g723_codec.h>#include <g723_func.h>/******************************** OS adaptation layer ********************************/#include <g723_api.h>/***************************************** Compact names for IPP G.723.1 constants *****************************************/#define FLEN IPP_G723_FRAME_LEN /* G.723.1/A frame length, in terms of PCM samples */#define SFLEN IPP_G723_SUBFRAME_LEN /* G.723.1/A subframe length, in terms of PCM samples */#define LPC IPP_G723_LPC_ORDER /* G.723.1/A LPC analysis order */#define LPC1 (IPP_G723_LPC_ORDER+1) /* G.723.1/A LPC analysis order+1 */#define LPCWIN IPP_G723_LPCWIN_LEN /* G.723.1/A LPC analysis Hamming window length */ #define SFNUM IPP_G723_NUM_SUBFRAME /* G.723.1/A number of subframes contained in one frame */#define MAXLAG IPP_G723_MAXLAG /* G.723.1/A longest possible pitch lag (55 Hz) */#define TAMING_PARAMS IPP_G723_TAMING_PARAMS /* G.723.1/A error taming parameter vector length */#define COVMATDIM IPP_G723_COVMATDIM /* G.723.1/A size of Toepliz covariance matrix for ACELP CB search *//******************************************************************************//// Name:// EncoderInit_G723//// Description:// Initialize G.723.1 encoder state //// Input Arguments: // pDstEncState - Pointer to uninitialized encoder state //// Output Arguments: // pDstEncState - Pointer to initialized encoder state//// Return Values: // 1 - No Error// 0 - Error//******************************************************************************/IppStatus EncoderInit_G723(IppG723EncoderState *pDstEncoderState){ int i; /* Clear the speech preprocessor highpass filter memory (one pole, one zero) */ pDstEncoderState->highpassFilterZfir = 0; pDstEncoderState->highpassFilterZiir = 0; /* Clear speech history buffer */ for ( i = 0; i < LPCWIN-SFLEN; i ++ ) pDstEncoderState->prevSpch[i] = 0; /* Clear sine detection flag */ pDstEncoderState->sineDtct = 0; /* Init LSF history to long-term DC component vector (Pdc) */ for ( i = 0; i < LPC; i ++ ) pDstEncoderState->prevLsf[i] = lsfDcInitTable[i]; /* Initialize perceptual weighting and combined filters */ for ( i = 0; i < LPC; i ++ ) { /* Clear perceptual weighting filter non-recursive or FIR memory */ pDstEncoderState->perceptualWeightFilterZfir[i] = 0; /* Clear perceptual weighting filter recursive or IIR memory */ pDstEncoderState->perceptualWeightFilterZiir[i] = 0; /* Clear combined filter, Si(z) non-recursive or FIR memory */ pDstEncoderState->combinedFilterZfir[i] = 0; } /* Initialize various speech buffers and filter memories */ for ( i = 0; i < MAXLAG; i ++ ) { /* Clear perceptually weighted speech history, maintained for OLPS */ pDstEncoderState->prevWgtSpch[i] = 0; /* Clear combined filter, Si(z) recursive ("IIR") memory */ pDstEncoderState->combinedFilterZiir[i] = 0; /* Clear the history of previous excitation sequences */ pDstEncoderState->prevExcitation[i] = 0; } /* Initialize error taming parameters */ for ( i = 0; i < TAMING_PARAMS; i ++ ) pDstEncoderState->errorTamingParams[i] = 4; /* Initialize OL pitch lag history used by VAD for voicing classification. If all 4 lags lie within +/-3 lag neighborhood of the minimum (or sine detected) then the frame is declared voiced */ pDstEncoderState->openLoopPitchLag[0] = 1; pDstEncoderState->openLoopPitchLag[1] = 1; pDstEncoderState->openLoopPitchLag[2] = 60; pDstEncoderState->openLoopPitchLag[3] = 60; /* Initialize VAD LPC inverse filter coefficients; these are updated by the CNG at runtime to estimate the (long-term) synthesis filter associated with the current background noise. */ for (i=0; i<LPC;i ++) pDstEncoderState->vadLpc[i] = 0; /* Initialize elements of the VAD state */ /* VAD LPC filter adaptation enable flag ([3], p. 3); used to ensure that VAD noise level (LPC filter) is updated only in the absence of speech. */ pDstEncoderState->vadState.adaptEnableFlag = 0; /* VAD previous frame (residual) energy ([3], p. 3) */ pDstEncoderState->vadState.prevFltEnergy = 1024; /* VAD previous frame noise level ([3], p. 3) */ pDstEncoderState->vadState.prevNoiseLevel = 1024; /* VAD "hangover" - number of frames to artificially declare voice detection after parametric detection has ended */ pDstEncoderState->vadState.hangoverCount = 3; /* VAD speech burst detector - incremented during VAD=1 frames; decremented during VAD=0 frames; VAD hangover is decremented by one when burst detector==0; hangover set to 6 frames whenever burst detector>=2. Bounded [0,3]. */ pDstEncoderState->vadState.vadVoiceCount = 0; /* Initialize CNG excitation generator random seed; many CNG excitation parameters are generated randomly, including LTP lag, LTP gain, fixed CB grid, pulse signs, and pulse positions. */ pDstEncoderState->randomSeedCNG = 12345; /* Initialize DTX frame type history; treat most recent frame as VAD=1 */ pDstEncoderState->prevDTXFrameType = IPP_G723_FRAMETYPE_VOICE; /* Summary of gain parameters G~sid - inverse quantized CNG SID gain parameter, quantized using 6-bit pseudo-log quantizer. G~t - target excitation gain; smoothed version of G~sid, i.e., G~t = G~sid, for VAD=1, 7/8 G~t-1 + 1/8 G~sid for VAD=0 Gf - fixed excitation scaling gain, computed to satisfy G~t */ /* Initialize quantized CNG excitation gain (G~sid, [3], p. 9); G~sid is the decoded value of the quantized CNG gain; it is used to derive the target excitation gain, G~t ([3], p. 10). */ pDstEncoderState->sidGain = 0; /* Initialize CNG target excitation gain (G~t, [3], p. 10); G~t defines the square root of the average energy required for the current frame synthetic excitation; it is used to derive the fixed excitation scaling gain, Gf. */ pDstEncoderState->targetExcitationGain = 0; /* Initialize frame (summation) autocorrelation histories (VAD) A history is maintained of four frame autocorrelation lag sequences. Frame autocorrelations are computed as the sum of the four subframe autocorrelations. */ for ( i = 0; i < (LPC1*SFNUM); i ++ ) pDstEncoderState->frameAutoCorr[i] = 0; /* Initialize frame (summation) autocorrelation exponent histories (VAD). Each exponent is associated with one frame autocorrelation. */ for ( i = 0; i < SFNUM; i ++ ) pDstEncoderState->frameAutoCorrExp[i] = 40; /* Initialize SID LSF vector (p~t associated with Asid(z)); computed by CNG module, the synthesis filter associated with this coefficient vector is used to generate comfort noise ([3], A.4.4, p. 9). The vector is updated by the CNG module only during SID frames; for non-TX frames, it is maintained as part of the DTX state in order to provide coherent frame processing. Asid(z) is derived from either At(z) or Aavgp(z), depending on the distance between At(z) and Aavgp(z), i.e., large spectral changes induce a change in SID spectral coefficients; small spectral changes are ignored. */ for ( i = 0; i < LPC; i ++ ) pDstEncoderState->sidLsf[i] = 0; /* Initialize elements of the DTX state */ /* Initialize autocorrelation of the SID LPC vector (Ra[j], [3], sec. A.4.2, p. 7, Eq. A-11). The sequence Ra[j] contains the autocorrelation of the SID LPC coefficients from the most recent SID frame, which is used in the Itakura distance calculation for estimating spectral distance/similarity between two LPC synthesis filters. The vector Ra[j] is maintained in the DTX state to allow comparison during DTX analysis of At(z) vs. Asid_t-1(z), i.e., to allow an evaluation of the similarity between the SID filter associated with the most recent SID frame (could be more than 1 frame earlier if several non-TX frames have elapsed) and the frame LPC filter associated with the current frame, At(z). At the end of SID frame parameter estimation, the vector (Ra[j]) is updated with the autocorrelation of new SID LPC synthesis filter, which is either At(z) for a large spectral change since the most recent SID frame, or Aavg_p(z) (average over several frames) for very small spectral changes since the most recent SID frame. Both the SID LPC autocorrelation sequence and its exponent are initialized to 0. */ pDstEncoderState->dtxState.sidLpcAutoCorrExp = 0; for ( i = 0; i < LPC1; i ++ ) pDstEncoderState->dtxState.sidLpcAutoCorr[i] = 0; /* Initialize the index of the quantized SID gain parameter, Gind_t; this parameter is maintained in the DTX state in order to allow evaluation of the change in background noise energy from one silence
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -