📄 g723_codec.c
字号:
&(pDecoderState->targetExcitationGain)); /* Apply inverse quantization to the LSFs ([1], section 2.6, p. 5, [2], LSP.c, Lsp_Inq()) LSFs are decoded into the vector Lsf[]. For invalid frames (frame erasures), the VQ table entries are zeroed, and a different fixed predictor and minimum frequency separation are used. */ if ( frameType == IPP_G723_FRAMETYPE_SID ) appsLSFQuantInv_G723_32s16s(QLsfIndex, pDecoderState->prevLsf, pDecoderState->sidLsf, 0); /* Synthesize CNG excitation using CNG procedure described in [3], section A4.5, pp. 10-11. */ appsGenerateCNGExcitation_G723_16s(pDecoderState->targetExcitationGain, pDecoderState->prevExcitation, EstimatedPitchLag, ClosedLoopPitchLagOffset, AdaptGainIndex, Excitation+MAXLAG, &(pDecoderState->randomSeedCNG), pDecoderState->bitRate); /* Interpolate LSFs, transform to LPCs, Quantize interpolated LPCs Update LSF history in the encoder state variable */ appsLSFInterp_G723_16s(pDecoderState->sidLsf,InterpLsf,QLpc,pDecoderState->prevLsf); } /* Process voice activity (VAD==1) frames */ else { /* Apply inverse quantization to the LSFs ([1], section 2.6, p. 5, [2], LSP.c, Lsp_Inq()) LSFs are decoded into the vector Lsf[]. For invalid frames (frame erasures), the VQ table entries are zeroed, and a different fixed predictor and minimum frequency separation are used. */ appsLSFQuantInv_G723_32s16s(QLsfIndex, pDecoderState->prevLsf, Lsf, erasureFrame); /* Interpolate LSFs on each subframe ([1], section 2.7, p. 6 ; also [2], LSP.C, Lsp_Int() and LsptoA()). Interpolated LSFs are stored in InterpLsf[]. Interpolated LSFs are also converted to LPCs and the resulting interpolated LPCs are stored in QLpc[]. */ appsLSFInterp_G723_16s(Lsf,InterpLsf,QLpc,pDecoderState->prevLsf); /* Generate excitation vector by combining contributions from fixed and adaptive codebooks using the seven steps outlined below. */ /* 1. Excitation buffer setup: initialize first 145 samples ofthe new excitation buffer with excitation history */ for ( i = 0; i < MAXLAG; i ++ ) Excitation[i] = pDecoderState->prevExcitation[i]; /* Valid frame excitation generator */ if ( pDecoderState->consecutiveFrameErasures == 0 ) { for ( i = 0; i < SFNUM; i ++ ) { SubframeOffset = (Ipp16s)(i * SFLEN); /* 2. Synthesize the fixed codebook vector */ if ( pDecoderState->bitRate == IPP_SPCHBR_6300 ) /* Generate MP-MLQ fixed codebook vector. As described in [1], section 2.17, p.13, pulse signs (FixedCBPulseSign), pulse positions (FixedCBPulsePos), the even/odd grid specifier (FixedCBGrid), the gain (FixedCBQGainIndex), and the Dirac train on/off state are combined to generate the 6.3 kbps fixed codebook vector. EstimatedPitchLag is used only if the Dirac train is enabled, otherwise it is ignored. Upon return, the synthesized codeword is returned in FixedCBVect[]. */ appsDecodeMPMLQVector_G723_16s(EstimatedPitchLag[i], FixedCBPulseSign[i], FixedCBQGainIndex[i], FixedCBGrid[i], FixedCBPulsePos[i], FixedCBVect, DiracTrainEnable[i], (Ipp16s)i); else /* Generate the ACELP fixed codebook vector as described in [1], section 2.17, p.13, the pulse signs (FixedCBPulseSign), pulse positions (iPosiWord), the even/odd grid specifier (FixedCBGrid), and the fixed codebook gain (FixedCBQGainIndex) are used to generate the codeword for pitch lags greater than 60. For pitch lags less than 60, a pitch contribution is synthesized and combined with the based fixed codebook vector. The fixed-codebook pitch contribution is derived from the pitch lag (EstimatedPitchLag+ClosedLoopPitchLagOffset) and the adaptive codebook gain (AdaptGainIndex). Upon return from the decoder, the synthesized vector is stored in FixedCBVect. */ appsDecodeACELPVector_G723_16s(EstimatedPitchLag[i], ClosedLoopPitchLagOffset[i], FixedCBPulseSign[i], FixedCBQGainIndex[i], FixedCBGrid[i], AdaptGainIndex[i], FixedCBPulsePos[i], FixedCBVect); /* 3. Synthesize the adaptive codebook vector as described in [1], section [1], section 2.14, p. 9. Upon entry, EstimatedPitchLag+ClosedLoopPitchLagOffset give the closed-loop pitch lag, AdaptGainIndex gives the adaptive codebook gain, bitRate specifies the bit rate, and Excitation contains the excitation history. Upon return, AdaptCBVect contains the synthesized adaptive codebook vector. */ ippsDecodeAdaptiveVector_G723_16s(EstimatedPitchLag[i], ClosedLoopPitchLagOffset[i], AdaptGainIndex[i], Excitation+SubframeOffset, AdaptCBVect, pDecoderState->bitRate); /* 4. Combine the fixed and adaptive vectors store and store the combined vector in Excitation[] */ appsGenerateExcitation_G723_16s(FixedCBVect, AdaptCBVect, &(Excitation[MAXLAG+SubframeOffset])); } /* 5. Normalize the combined excitation vector according to the element of largest magnitude for the interpolation search and pitch postfilter parameter extraction. Also, update decoder excitation history. */ appsNormalizeExcitation_G723_16s(Excitation, ExcitationNorm, pDecoderState->prevExcitation, &(pDecoderState->targetExcitationGain)); /* Update the interpolation frame adaptive codebook index that is used to synthesize erasure frames. The interpolated codebook index is actually used only during interpolation (erasure) frames. This routine incorporates a voicing classifier which makes a voicing decision on the basis of cross-correlation maximization. The last 120 samples of the frame are cross-correlated with L2+/-3. The prediction gain of the best candidate vector is tested - if it exceeds 0.58 dB, the frame is declared voiced, otherwise unvoiced. The classifier returns 0 for unvoiced frames. The procedure is described in [1], section 3.10.2, p. 22.*/ appsUpdateErasureInterpIndex_G723A_16s(EstimatedPitchLag[2], ExcitationNorm+MAXLAG, &(pDecoderState->interpolationIndex), &(pDecoderState->sidGain)); /* 6. Compute pitch postfilter parameters as described in [1], section 3.6, pp. 18-20. Upon input, EstimatedPitchLag defines the pitch postfilter search window, and Excitation contains the normalized excitation vector, bitRate specifies the bitRate, and i the subframe. Upon return, Delay gives the pitch postfilter delay parameter (Mb or Mf), Gain gives the pitch postfilter gain parameter (gammaltp*g_f or gammaltp*g_b), and ScalingGain gives pitch postfilter scaling gain, g_p (Eq.47) */ for ( i = 0; i < SFNUM; i ++ ) ippsPitchPostFilter_G723_16s(EstimatedPitchLag[i], ExcitationNorm+MAXLAG, &Delay[i], &Gain[i], &ScalingGain[i], (Ipp16s)i, pDecoderState->bitRate); /* Update SID LSFs */ for ( i = 0; i < LPC; i ++) pDecoderState->sidLsf[i] = pDecoderState->prevLsf[i]; /* 7. Apply pitch postfilter to the excitation vector, as described in [1], section 3.6, p. 18. The postfiltred excitation, ppf[n], is stored in the vector ExcitationPPF. Note that because of the weighting terms, wb, wf, [=0,1; 1,0; or 0,0] only a single delay and gain term are required to parameterize the pitch postfilter */ if (enablePostFilter) for ( i = 0; i < SFNUM; i ++ ) appsApplyPitchPostFilter_G723_16s(Delay[i], Gain[i], ScalingGain[i], &(Excitation[MAXLAG+i*SFLEN]), &(ExcitationPPF[i*SFLEN])); } /* End valid frame excitation generator */ /* Invalid (erasure/interpolated) frame excitation generator */ else { /* Generate erasure excitation, update excitation history */ if (enablePostFilter) appsGenerateErasureExcitation_G723_16s(ExcitationPPF, pDecoderState->prevExcitation, &(pDecoderState->randomSeed), pDecoderState->interpolationIndex, pDecoderState->consecutiveFrameErasures, pDecoderState->interpolationGain); else appsGenerateErasureExcitation_G723_16s(&(Excitation[MAXLAG]), pDecoderState->prevExcitation, &(pDecoderState->randomSeed), pDecoderState->interpolationIndex, pDecoderState->consecutiveFrameErasures, pDecoderState->interpolationGain); } /* End of excitation generation for both valid and erasure frames */ /* Reset comfort noise PRNG seed */ pDecoderState->randomSeedCNG = 12345; } /* End voice activity (VAD==1) frame processing */ /* Maintain frame type history */ pDecoderState->prevDTXFrameType = frameType; /* Apply LPC synthesis and synthesis post filters to the excitation sequence */ for ( i = 0; i < SFNUM; i ++ ) { SubframeOffset = (Ipp16s)(i * SFLEN); outputSpeech=(Ipp16s *)pDstSpeech->pBuf; outputSpeech+=SubframeOffset; /* Apply the LPC synthesis filter as described in [1], section 3.7, p. 20. */ if ( (enablePostFilter) && (frameType == IPP_G723_FRAMETYPE_VOICE) ) ippsSynthesisFilter_G723_16s(QLpc[i], ExcitationPPF+SubframeOffset, pDecoderState->synthesisFilterZiir, outputSpeech); else ippsSynthesisFilter_G723_16s(QLpc[i], Excitation+SubframeOffset+MAXLAG, pDecoderState->synthesisFilterZiir, outputSpeech); /* Apply the formant postfilter, F(z), as described in [1], Eq. 49.3, section 3.8, p. 20. Upon input, QLpc contains quantized, subframe interpolated LPCs, pDecoderState->formantIIRHstSpch contains the recursive filter memory, pDecoderState->formantFIRHstSpch contains the non-recursive filter memory, pDstDecSpch contains the input speech, and pDecoderState->autoCorr contains the autocorrelation coefficient R(1), (k), as given by Eq. 49.1. Upon return, pDstDecSpch contains the postfiltered output, and SpeechEnergyEstimate contains the input speech energy (sum sy^2). */ if (enablePostFilter) appsApplyFormantPostFilter_G723_16s(QLpc[i], outputSpeech, pDecoderState->formantPostfilterZfir, pDecoderState->formantPostfilterZiir, &SpeechEnergyEstimate, &(pDecoderState->autoCorr)); /* Apply gain scaling unit as described in [1], section 3.9, p. 21. */ appsApplyGainScale_G723_16s_I(outputSpeech, enablePostFilter, &(pDecoderState->prevGain), SpeechEnergyEstimate); } /* End synthesis and postfiltering loop */ /* Return status OK */ return(1);} /* Decode_G723_8u16s *//******************************************************************************************//// Name:// EncodeBlock_G723_16s8u//// Description:// Compress multiple 30 ms input speech frames (240 samples, 16-bit // linear PCM per frame) into 189/158/32/8-bits/frame compressed bitstream. // Represent the compressed bitstream using 24/20/4/1 bytes/frame, in accordance // with ITU Recommendations G.723.1, and G.723.1 Annex A//// Rates and stream size with VAD disabled (G.723.1):// - 5.3 kbps bit rate generates a 160-bit/20 byte output bitstream// - 6.3 kbps bit rate generates a 192-bit/24 byte output bitstream//// Rates and stream size with VAD enabled (G.723.1 Annex A):// - SID (silence interval description) generates a 32-bit/4 byte output bitstream// - NONTX (untransmitted VAD==0) generates a 8-bit/1 byte output bitstream//// Performance timer tracks average clock ticks per block.//// Input Arguments: // s - Pointer to input speech buffer, of length FRAMES*IPP_G723_FRAME_LEN// dtxEnable - VAD/DTX control (Annex A); 1=enable, 0=disable// rate - Bit rate specifier, IPP_SPCHBR_xx, where xx=53 or 63// denotes, respectively, 5.3 and 6.3 kbps// state - Pointer to encoder state//// Output Arguments: // bits - Compressed bitstream buffer pointer, of length 20/24 bytes/frame// pEncoderState - Pointer to updated encoder state//// Returns: // none//********************************************************************************************/void EncodeBlock_G723_16s8u(IppPcmStream *speech, IppBitstream
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -