📄 g723_codec.c
字号:
/******************************************************************************
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2001-03 Intel Corporation. All Rights Reserved.
//
// Filename:
// g723_codec.c
//
// Description:
// Intel Integrated Performance Primitives (IPP)
// G.723.1 encoder and decoder codec API
// Constructed from the IPP building blocks for ITU G.723.1
//
// Revision:
// 1.0
//
// References:
// [1] ITU-T Rec. G.723.1, "Dual Rate Speech Coder for Multimedia
// Communications Transmitting at 5.3 and 6.3 kbit/s,"
// Telecommunications Standardization Sector of ITU,
// March, 1996.
// [2] ITU-T Rec. G.723.1 fixed-point C reference code ("96_03"),
// March, 1996.
// [3] ITU-T Rec. G.723.1 Annex A, "Annex A: Silence Compression Scheme,"
// Telecommunications Standardization Sector of ITU,
// November, 1996.
******************************************************************************/
/***************************************************************************************
//
// The following functions are defined in this file:
//
// EncoderInit_G723 (ippG723EncoderState *pDstEncoderState);
//
// Encode_G723_16s8u (const IppSound *pSrcSpeech, IppBitstream *pDstBitstream,
// IppSpchBitRate bitRate, int enableVad,
// int enableHighpassFilter, IppG723EncoderState *pEncoderState);
//
// EncodeBlock_G723_16s8u (IppPcmStream *speech, IppBitstream *bitstream,
// Ipp16s dtxEnable, IppSpchBitRate rate,
// IppG723EncoderState *state);
//
// DecoderInit_G723 (IppG723DecoderState *pDstDecoderState);
//
// Decode_G723_8u16s (const IppBitstream *pSrcBitstream, IppSound *pDstSpeech,
// int erasureFrame, IppG723DecoderState *pDecoderState);
//
// DecodeBlock_G723_8u16s (IppBitstream *bitstream, IppPcmStream *speech,
// IppG723DecoderState *state);
//
******************************************************************************************/
/********************************
Standard definitions
********************************/
#include <g723_platform_config.h>
#include <g723_codec.h>
#include <g723_aux.h>
/********************************
OS adaptation layer
********************************/
#ifdef OS_LINUX
#include <g723_oal_linux.h>
#else
#include <g723_oal_win32.h>
#endif
/*****************************************
Compact names for IPP G.723.1 constants
*****************************************/
#define FLEN IPP_G723_FRAME_LEN /* G.723.1/A frame length, in terms of PCM samples */
#define SFLEN IPP_G723_SUBFRAME_LEN /* G.723.1/A subframe length, in terms of PCM samples */
#define LPC IPP_G723_LPC_ORDER /* G.723.1/A LPC analysis order */
#define LPC1 (IPP_G723_LPC_ORDER+1) /* G.723.1/A LPC analysis order+1 */
#define LPCWIN IPP_G723_LPCWIN_LEN /* G.723.1/A LPC analysis Hamming window length */
#define SFNUM IPP_G723_NUM_SUBFRAME /* G.723.1/A number of subframes contained in one frame */
#define MAXLAG IPP_G723_MAXLAG /* G.723.1/A longest possible pitch lag (55 Hz) */
#define TAMING_PARAMS IPP_G723_TAMING_PARAMS /* G.723.1/A error taming parameter vector length */
#define COVMATDIM IPP_G723_COVMATDIM /* G.723.1/A size of Toepliz covariance matrix for ACELP CB search */
/******************************************************************************
//
// Name:
// EncoderInit_G723
//
// Description:
// Initialize G.723.1 encoder state
//
// Input Arguments:
// pDstEncState - Pointer to uninitialized encoder state
//
// Output Arguments:
// pDstEncState - Pointer to initialized encoder state
//
// Return Values:
// 1 - No Error
// 0 - Error
//
******************************************************************************/
IppStatus EncoderInit_G723(IppG723EncoderState *pDstEncoderState)
{
int i;
/* Clear the speech preprocessor highpass filter memory (one pole, one zero) */
pDstEncoderState->highpassFilterZfir = 0;
pDstEncoderState->highpassFilterZiir = 0;
/* Clear speech history buffer */
for ( i = 0; i < LPCWIN-SFLEN; i ++ )
pDstEncoderState->prevSpch[i] = 0;
/* Clear sine detection flag */
pDstEncoderState->sineDtct = 0;
/* Init LSF history to long-term DC component vector (Pdc) */
for ( i = 0; i < LPC; i ++ )
pDstEncoderState->prevLsf[i] = lsfDcInitTable[i];
/* Initialize perceptual weighting and combined filters */
for ( i = 0; i < LPC; i ++ )
{
/* Clear perceptual weighting filter non-recursive or FIR memory */
pDstEncoderState->perceptualWeightFilterZfir[i] = 0;
/* Clear perceptual weighting filter recursive or IIR memory */
pDstEncoderState->perceptualWeightFilterZiir[i] = 0;
/* Clear combined filter, Si(z) non-recursive or FIR memory */
pDstEncoderState->combinedFilterZfir[i] = 0;
}
/* Initialize various speech buffers and filter memories */
for ( i = 0; i < MAXLAG; i ++ )
{
/* Clear perceptually weighted speech history, maintained for OLPS */
pDstEncoderState->prevWgtSpch[i] = 0;
/* Clear combined filter, Si(z) recursive ("IIR") memory */
pDstEncoderState->combinedFilterZiir[i] = 0;
/* Clear the history of previous excitation sequences */
pDstEncoderState->prevExcitation[i] = 0;
}
/* Initialize error taming parameters */
for ( i = 0; i < TAMING_PARAMS; i ++ )
pDstEncoderState->errorTamingParams[i] = 4;
/* Initialize OL pitch lag history used by VAD for voicing classification.
If all 4 lags lie within +/-3 lag neighborhood of the minimum
(or sine detected) then the frame is declared voiced */
pDstEncoderState->openLoopPitchLag[0] = 1;
pDstEncoderState->openLoopPitchLag[1] = 1;
pDstEncoderState->openLoopPitchLag[2] = 60;
pDstEncoderState->openLoopPitchLag[3] = 60;
/* Initialize VAD LPC inverse filter coefficients; these are updated by
the CNG at runtime to estimate the (long-term) synthesis filter
associated with the current background noise. */
for (i=0; i<LPC;i ++)
pDstEncoderState->vadLpc[i] = 0;
/* Initialize elements of the VAD state */
/* VAD LPC filter adaptation enable flag ([3], p. 3);
used to ensure that VAD noise level (LPC filter)
is updated only in the absence of speech. */
pDstEncoderState->vadState.adaptEnableFlag = 0;
/* VAD previous frame (residual) energy ([3], p. 3) */
pDstEncoderState->vadState.prevFltEnergy = 1024;
/* VAD previous frame noise level ([3], p. 3) */
pDstEncoderState->vadState.prevNoiseLevel = 1024;
/* VAD "hangover" - number of frames to artificially declare
voice detection after parametric detection has ended */
pDstEncoderState->vadState.hangoverCount = 3;
/* VAD speech burst detector - incremented during VAD=1 frames;
decremented during VAD=0 frames; VAD hangover is decremented
by one when burst detector==0; hangover set to 6 frames
whenever burst detector>=2. Bounded [0,3]. */
pDstEncoderState->vadState.vadVoiceCount = 0;
/* Initialize CNG excitation generator random seed; many CNG excitation
parameters are generated randomly, including
LTP lag, LTP gain, fixed CB grid, pulse signs, and
pulse positions. */
pDstEncoderState->randomSeedCNG = 12345;
/* Initialize DTX frame type history; treat most recent frame as VAD=1 */
pDstEncoderState->prevDTXFrameType = IPP_G723_FRAMETYPE_VOICE;
/* Summary of gain parameters
G~sid - inverse quantized CNG SID gain parameter, quantized using
6-bit pseudo-log quantizer.
G~t - target excitation gain; smoothed version of G~sid, i.e.,
G~t = G~sid, for VAD=1, 7/8 G~t-1 + 1/8 G~sid for VAD=0
Gf - fixed excitation scaling gain, computed to satisfy G~t
*/
/* Initialize quantized CNG excitation gain (G~sid, [3], p. 9);
G~sid is the decoded value of the quantized CNG gain; it is
used to derive the target excitation gain, G~t ([3], p. 10). */
pDstEncoderState->sidGain = 0;
/* Initialize CNG target excitation gain (G~t, [3], p. 10);
G~t defines the square root of the average energy required for
the current frame synthetic excitation; it is used to derive
the fixed excitation scaling gain, Gf. */
pDstEncoderState->targetExcitationGain = 0;
/* Initialize frame (summation) autocorrelation histories (VAD)
A history is maintained of four frame autocorrelation
lag sequences. Frame autocorrelations are computed as the
sum of the four subframe autocorrelations. */
for ( i = 0; i < (LPC1*SFNUM); i ++ )
pDstEncoderState->frameAutoCorr[i] = 0;
/* Initialize frame (summation) autocorrelation exponent histories (VAD).
Each exponent is associated with one frame autocorrelation. */
for ( i = 0; i < SFNUM; i ++ )
pDstEncoderState->frameAutoCorrExp[i] = 40;
/* Initialize SID LSF vector (p~t associated with Asid(z)); computed by CNG module,
the synthesis filter associated with this coefficient vector is used to generate comfort
noise ([3], A.4.4, p. 9). The vector is updated by the CNG module only during SID frames; for
non-TX frames, it is maintained as part of the DTX state in order to provide
coherent frame processing. Asid(z) is derived from either At(z) or Aavgp(z), depending on the
distance between At(z) and Aavgp(z), i.e., large spectral changes induce a change in SID
spectral coefficients; small spectral changes are ignored. */
for ( i = 0; i < LPC; i ++ )
pDstEncoderState->sidLsf[i] = 0;
/* Initialize elements of the DTX state */
/* Initialize autocorrelation of the SID LPC vector (Ra[j], [3], sec. A.4.2, p. 7, Eq. A-11).
The sequence Ra[j] contains the autocorrelation of the SID LPC coefficients from the most recent
SID frame, which is used in the Itakura distance calculation for estimating spectral
distance/similarity between two LPC synthesis filters. The vector Ra[j] is maintained in
the DTX state to allow comparison during DTX analysis of At(z) vs. Asid_t-1(z), i.e., to allow an
evaluation of the similarity between the SID filter associated with the most recent SID frame
(could be more than 1 frame earlier if several non-TX frames have elapsed) and the frame LPC filter
associated with the current frame, At(z). At the end of SID frame parameter estimation, the vector
(Ra[j]) is updated with the autocorrelation of new SID LPC synthesis filter, which is either
At(z) for a large spectral change since the most recent SID frame, or Aavg_p(z) (average over
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -