⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 g723_codec.c

📁 G.723在ARM上的实现。实现平台为Linux2.4.8+ Intel Xscal。包括源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
       frame to the next.  Large inter-frame changes (>3 quantizer indices) imply significant changes 	   in background noise energy; the CNG module responds by inserting a SID frame; small inter-frame changes       allow the current frame to remain a candidate for non-transmission, provided the inter-frame       spectral changes are also small. */	pDstEncoderState->dtxState.qSidGainIndex = 0;	/* Initialize the SID prediction residual energy history, the elements of which are interpreted 	   as estimates of the energy associated with each CNG frame excitation. Depending on the number of        Up to 3 subframes are considered for any given average computation (see below). */	pDstEncoderState->dtxState.residualEnergy[0] = 0;	pDstEncoderState->dtxState.residualEnergy[1] = 0;	pDstEncoderState->dtxState.residualEnergy[2] = 0;	/* Initialize silence frame counter; this counter, incremented up to a maximum of value of three, tracks        the number of consecutive silence frames following a speech interval.  The counter is set to 1 at the        start of an "inactive zone", i.e., during the first silence frame following a period of speech       activity, and is incremented during each consecutive SID or non-TX frame up to a maximum value of 3.       This counter is used to determine the number of frame energy estimates over which to compute an       average during SID gain quantization. */	pDstEncoderState->dtxState.sumFrame = 0;	return(1);} /* EncoderInit_G723 *//******************************************************************************************//// Name://		Encode_G723_16s8u//// Description://		Compress one 30 ms input speech frame (240 samples, 16-bit //		linear PCM) into a 189/158/32/8-bit compressed bitstream. //      Represent the compressed bitstream using 24/20/4/1 bytes, in accordance //      with ITU Recommendations G.723.1, and G.723.1 Annex A////      Rates and stream size with VAD disabled (G.723.1)://			- 5.3 kbps bit rate generates a 160-bit/20 byte output bitstream//          - 6.3 kbps bit rate generates a 192-bit/24 byte output bitstream////      Rates and stream size with VAD enabled (G.723.1 Annex A)://			- SID (silence interval description) generates a 32-bit/4 byte output bitstream//          - NONTX (untransmitted VAD==0) generates a 8-bit/1 byte output bitstream//// Input Arguments:  //		pSrcSpeech		       - Pointer to input speech vector, of length 240 samples//		bitRate				   - Bit rate specifier, IPP_SPCHBR_xx, where xx=53 or 63//                               denotes, respectively, 5.3 and 6.3 kbps//		enableVad              - VAD/DTX control (Annex A); 1=enable, 0=disable//		enableHighpassFilter   - Preprocessor HPF control; 1=enable, 0=disable; switch//                               provided for test vector compliance verificaiton//		pEncoderState		   - Pointer to encoder state//// Output Arguments: //		pDstBitstream		   - Compressed bitstream pointer, of length 20/24 bytes//		pEncoderState		   - Pointer to updated encoder state//// Returns: //		1	   - No Error.//      0      - Bad Arguments.//********************************************************************************************/IppStatus Encode_G723_16s8u(IppPcmStream *pSrcSpeech,							IppBitstream *pDstBitstream,                             IppSpchBitRate bitRate, 							int enableVad, 							int enableHighpassFilter,                            IppG723EncoderState *pEncoderState){	int		i,j;								   /* general purpose loop indices */	Ipp16s	SpchAnalysisBuf[LPCWIN+FLEN-SFLEN];	   /* multi-purpose speech and excitation analysis buffer */	Ipp16s	PrcptWghtSpchBuf[FLEN+MAXLAG];		   /* perceptually weighted speech buffer */	Ipp16s	OLPSAnalysisBuf[FLEN+MAXLAG];		   /* open-loop pitch search analysis buffer */	Ipp16s	autoCorrelation[(LPC1+1)*SFNUM];	   /* subframe autocorrelation vectors (4xLPC1) for AC summation computation */	Ipp16s	Lpc[SFNUM][LPC];					   /* LPC coefficient vector */	Ipp16s	Lsf[LPC], InterpLsf[LPC];			   /* LSF and interpolated LSF vectors */	Ipp32s	QLsfIndex;							   /* LSF quantization index */ 	Ipp16s	InvalidFrame;						   /* LSF quantization flag */	Ipp16s	QLpc[SFNUM][LPC];					   /* quantized interpolated LPC vectors */	Ipp16s	PerceptLpc[SFNUM<<1][LPC];			   /* perceptually weighted LPC vector */	Ipp16s	OpenLoopPitchLag[SFNUM>>1];			   /* open-loop pitch estimates */	Ipp16s	HNSLag[SFNUM];						   /* harmonic noise shaping lags */	Ipp16s	HNSGain[SFNUM];						   /* harmonic noise shaping gains */	Ipp16s	ImpulseResp[SFLEN];					   /* combined filter impulse responses */	Ipp16s	AdaptGainIndex[SFNUM];				   /* adaptive codebook gain indices */	Ipp16s	EstimatedPitchLag[SFNUM];			   /* closed-loop pitch search neighboorhood (OLPS-based) */	Ipp16s	ClosedLoopPitchLagOffset[SFNUM];	   /* closed-loop pitch search results - offsets to OLPS */	Ipp16s	AdaptCBVect[SFLEN];                    /* adaptive codebook vector */  	Ipp16s	FixedCBVect[SFLEN];					   /* fixed codebook vector */	Ipp16s	FixedCBGrid[SFNUM];					   /* fixed codebook search grid */	Ipp16s	DiracTrainEnable[SFNUM];			   /* MP-MLQ pulse train usage flag: 0=off, 1=on */	Ipp16s	ImpRespCovarMatrix[COVMATDIM];		   /* Toepliz covariance matrix for the impulse response of Si(z) */	Ipp16s	CrossCorrACELP[SFLEN];				   /* ACELP codebook search cross correlations (target x h(n)) */	Ipp16s	PulseSignsACELP[SFNUM][4];			   /* unpacked ACELP codevector pulse signs */	Ipp16s	PulsePosACELP[SFNUM][4];			   /* unpacked ACELP codevector pulse positions */	Ipp16s	FixedCBQGain[SFNUM];				   /* quantized fixed codebook gains */	Ipp16s	FixedCBQGainIndex[SFNUM];		   	   /* quantized fixed codebook gain indices */	Ipp16s	FixedCBPulseSign[SFNUM];			   /* packed/encoded fixed CB pulse signs */	Ipp32s	FixedCBPulsePos[SFNUM];				   /* packed/encoded fixed CB pulse positions */	Ipp16s  PitchSyncIndex, PitchSyncGain;		   /* ACELP pitch synchronous filter parameters */	Ipp16s	ACELPInnerLoopMaxEntry;				   /* ACELP codebook search control parameter */	Ipp16s	SubframeOffset;						   /* speech analysis buffer subframe base address */	Ipp16s  residualEnergy;						   /* Levinson-Durbin residual energy parameter (unused) */	Ipp16s  vad=1;                                 /* VAD result; 1=voice present; 0=voice absent */	Ipp16s	frameType = IPP_G723_FRAMETYPE_VOICE;  /* frame type: 0=nonTX, 1=active speech (VAD==1), 2=SID (VAD==0) */	/* Apply highpass filter to the input speech; eliminate any DC offset.	   The preprocessing highpass filter is given in [1], Eq. 1, p. 3.	   The HPF prevents artificical increases in R(0) during        autocorrelation analysis.  After filtering, prepare an input 	   speech buffer for LPC autocorrelation analysis (Levinson-Durbin).	   Upon return, the buffer SpchAnalysisBuf contains highpass filtered 	   input speech suitable for autocorrelation analysis.  The HPF filter        memory is maintained in the encoder state variable.  A switch is	   provided to disable the highpass filter during test vector compliance	   procedures as required in [1].	*/	appsPreprocess_G723_I((Ipp16s *)pSrcSpeech->pBuf,                           SpchAnalysisBuf,                          enableHighpassFilter,						  &(pEncoderState->highpassFilterZfir), 						  &(pEncoderState->highpassFilterZiir), 						  pEncoderState->prevSpch);		/* Perform autocorrelation analysis, estimate LPC parameters using Levinson-Durbin */	for ( i=j=0; i<SFNUM; i++, j+=(LPC1+1) )	{		ippsAutoCorr_G723_16s(SpchAnalysisBuf+i*SFLEN, autoCorrelation+j+LPC1, autoCorrelation+j);		ippsLevinsonDurbin_G723_16s(autoCorrelation+j, &(pEncoderState->sineDtct), &residualEnergy, Lpc[i]);	}	/* Compute summation autocorrelation over 4 subframes, then	   update summation autocorrelation history in the encoder state (4 frame history),	   as well as the summation autocorrelation scaling history (exp) */	appsAutoCorrSum_G723_16s(autoCorrelation,                              pEncoderState->frameAutoCorr, 							 pEncoderState->frameAutoCorrExp);	/* Detect voice activity	   using the algorithm described in [3], section A.2, pp. 2-4.	   Upon return from the VAD analysis, VAD==1 indicates voice present, and	   VAD==0 indicates voice absent */	if (enableVad)		appsVAD_G723_16s(pEncoderState->sineDtct,                         pEncoderState->openLoopPitchLag, 					     pEncoderState->vadLpc, 					     SpchAnalysisBuf+(SFLEN<<1), 					     &vad, 					     &(pEncoderState->vadState));	/* Update sine detector */	appsSinDetect_G723_I(&(pEncoderState->sineDtct));	/* LPC quantization  	   	   1. LPC->LSF transformation on subframe 3 (last subframe)	   2. Quantize LSFs on subframe 3 	*/	/* Convert LPCs to LSFs */	ippsLPCToLSF_G723_16s(Lpc[3], pEncoderState->prevLsf, Lsf);	/* Quantize LSFs */	ippsLSFQuant_G723_16s32s(Lsf, pEncoderState->prevLsf, &QLsfIndex);	/* Update speech analysis buffer */	for ( i = 0; i < LPCWIN-SFLEN; i ++ )		pEncoderState->prevSpch[i] = SpchAnalysisBuf[FLEN+i];	for ( i = 0; i < FLEN; i ++ ) 		SpchAnalysisBuf[i] = SpchAnalysisBuf[((LPCWIN-SFLEN)>>1)+i];		/* Construct perceptual weighting filter 	   (Eq. 11 of [1], sec. 2.8, p. 7). Using the scaling property 	   of the Z-transform, shift the poles and zeros radially	   inwards towards the center of the unit circle, to affect 	   prediction residual (excitation) matching emphasis in the        most audible regions during the codebook search procedures.	   After constructing the filter, apply perceptual weighting 	   to the input speech, i.e., compute f(n) ([1], p. 7) 	   Upon return from the PWF function, PrcptWghtSpchBuf contains the 	   sequence f(n). 	*/	appsPerceptualWeightingFilter_G723_16s(SpchAnalysisBuf, Lpc, PerceptLpc, PrcptWghtSpchBuf,  										   pEncoderState->perceptualWeightFilterZfir, 										   pEncoderState->perceptualWeightFilterZiir,										   pEncoderState->prevWgtSpch);	/* Prepare an analysis buffer for the open loop pitch search as follows: 	   1. Load last MAXLAG (145) weighted samples from the previous frame into	      the first MAXLAG samples of the current frame's OLPS analysis buffer.        2. The remaining 240 samples of the OLPS analysis buffer were already generated 	      by the PWF, above.	   3. Identify the element of largest magnitude in the entire OLPS buffer to 	      perform normalization 	   4. For VAD==1, maintain a history of the perceptually weighted speech in the encoder state 	      to be used for the OLPS during the next frame (see analysis buffer           construction)       5. For VAD==0, perceptually weighted speech history is 	*/ 	appsOpenLoopPitchSearchPreprocess_G723_16s(PrcptWghtSpchBuf, OLPSAnalysisBuf, pEncoderState->prevWgtSpch);	/* Perform open-loop pitch search  	   The OLPS primitive computes the cross-correlation criterion (Eq. 12 of [1], sec. 2.9, p. 7) 	   and performs the maximization search described in [1].  The maximizing index, j, is returned	   for each of two half-frames (two subframes in each half-frame).	*/	for ( i = 0; i < SFNUM>>1; i ++ )	{		/* Perform open-loop pitch search */		ippsOpenLoopPitchSearch_G723_16s(OLPSAnalysisBuf+MAXLAG+(i*SFLEN<<1),                                          &OpenLoopPitchLag[i]);		/* Update OLPS history used in VAD processing */		pEncoderState->openLoopPitchLag[i] = pEncoderState->openLoopPitchLag[i+2];		pEncoderState->openLoopPitchLag[i+2] = OpenLoopPitchLag[i];	}	/* Process VAD==0 frame; i.e., silence or voice activity absent. 	   DTX decision processing classifies the frame using one of two categories:            1) SID (silence interval description) -- silence "reference"               frames are parameterized in terms of LPCs and a gain.			2) non-transmitted silence (NonTX) -- uses previous SID parameters.	*/	if (vad==0)	{		/* Reset quantized LSF index and frame type indicator */		QLsfIndex = 0;		frameType = IPP_G723_FRAMETYPE_NONTX;		/* Classify the frame as SID or NonTX by analyzing input speech according 		   to the DTX procedure described in [3], sections A3 and A4, pp. 4-9. */		appsDTXDecision_G723_16s(pEncoderState->prevDTXFrameType,                                  pEncoderState->prevLsf, 							     pEncoderState->frameAutoCorr, 							     pEncoderState->frameAutoCorrExp, 								 pEncoderState->vadLpc, 								 FixedCBQGainIndex, 								 &(pEncoderState->sidGain), 								 &QLsfIndex,								 pEncoderState->sidLsf, 								 &frameType, 								 &(pEncoderState->targetExcitationGain),								 &(pEncoderState->vadState), 								 &(pEncoderState->dtxState));				/* Synthesize CNG excitation using CNG procedure described in [3], 		   section A4.5, pp. 10-11. */		appsGenerateCNGExcitation_G723_16s(pEncoderState->targetExcitationGain,                                            pEncoderState->prevExcitation,								           EstimatedPitchLag, 								           ClosedLoopPitchLagOffset, 								           AdaptGainIndex, 								           PrcptWghtSpchBuf+MAXLAG, 								           &(pEncoderState->randomSeedCNG), 								           bitRate); 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -