📄 owng729fp.c
字号:
msdState->fMeanPitchGain =0.5f;
msdState->lPFlagCounter=0;
msdState->fMeanPFlagCounter=0.0;
msdState->lConscPFlagCounter=0;
msdState->lRCCounter=0;
msdState->fMeanFullBandEnergy =0.0f;
return;
}
void MusicDetection_G729E_32f(G729FPEncoder_Obj *encoderObj, G729Codec_Type codecType, float Energy,
float *ReflectCoeff, int *VadDecision, float LLenergy, char *msdMem,float *pExtBuff)
{
int i;
float fSum1, fSum2,fStandartDeviation;
short VoicingStrenght1, VoicingStrenght2, VoicingStrenght;
float fError, fEnergy , fSpectralDifference, *pTmpVec;
float fThreshold;
MusDetectMemory *msdState = (MusDetectMemory *)msdMem;
pTmpVec = &pExtBuff[0]; /*10 elements*/
fError = 1.0f;
for (i=0; i< 4; i++) fError *= (1.0f - ReflectCoeff[i]*ReflectCoeff[i]);
ippsSub_32f(msdState->MeanRC, ReflectCoeff, pTmpVec, 10);
ippsDotProd_32f(pTmpVec, pTmpVec, 10, &fSpectralDifference);
fEnergy = 10.0f*(float)log10(fError*Energy/240.0f +IPP_MINABS_32F);
if( *VadDecision == VAD_NOISE ){
ippsInterpolateC_G729_32f(msdState->MeanRC, 0.9f, ReflectCoeff, 0.1f, msdState->MeanRC, 10);
msdState->fMeanFullBandEnergy = 0.9f * msdState->fMeanFullBandEnergy + 0.1f * fEnergy;
}
fSum1 = 0.0f;
fSum2 = 0.0f;
for(i=0; i<5; i++){
fSum1 += (float) encoderObj->LagBuffer[i];
fSum2 += encoderObj->PitchGainBuffer[i];
}
fSum1 = fSum1/5.0f;
fSum2 = fSum2/5.0f;
fStandartDeviation =0.0f;
for(i=0; i<5; i++) fStandartDeviation += sqr(((float) encoderObj->LagBuffer[i] - fSum1));
fStandartDeviation = (float)sqrt(fStandartDeviation/4.0f);
msdState->fMeanPitchGain = 0.8f * msdState->fMeanPitchGain + 0.2f * fSum2;
/* See I.5.1.1 Pitch lag smoothness and voicing strenght indicator.*/
if ( codecType == G729D_CODEC)
fThreshold = 0.73f;
else
fThreshold = 0.63f;
if ( msdState->fMeanPitchGain > fThreshold)
VoicingStrenght2 = 1;
else
VoicingStrenght2 = 0;
if ( fStandartDeviation < 1.30f && msdState->fMeanPitchGain > 0.45f )
VoicingStrenght1 = 1;
else
VoicingStrenght1 = 0;
VoicingStrenght= (short)( ((short)encoderObj->prevVADDec & (short)(VoicingStrenght1 | VoicingStrenght2))| (short)(VoicingStrenght2));
if( ReflectCoeff[1] <= 0.45f && ReflectCoeff[1] >= 0.0f && msdState->fMeanPitchGain < 0.5f)
msdState->lRCCounter++;
else
msdState->lRCCounter =0;
if( encoderObj->prevLPCMode== 1 && (*VadDecision == VAD_VOICE))
msdState->lMusicCounter++;
if ((encoderObj->sFrameCounter%64) == 0 ){
if( encoderObj->sFrameCounter == 64)
msdState->fMusicCounter = (float)msdState->lMusicCounter;
else
msdState->fMusicCounter = 0.9f*msdState->fMusicCounter + 0.1f*(float)msdState->lMusicCounter;
}
if( msdState->lMusicCounter == 0)
msdState->lZeroMusicCounter++;
else
msdState->lZeroMusicCounter = 0;
if( msdState->lZeroMusicCounter > 500 || msdState->lRCCounter > 150) msdState->fMusicCounter = 0.0f;
if ((encoderObj->sFrameCounter%64) == 0)
msdState->lMusicCounter = 0;
if( VoicingStrenght== 1 )
msdState->lPFlagCounter++;
if ((encoderObj->sFrameCounter%64) == 0 ){
if( encoderObj->sFrameCounter == 64)
msdState->fMeanPFlagCounter = (float)msdState->lPFlagCounter;
else{
if( msdState->lPFlagCounter > 25)
msdState->fMeanPFlagCounter = 0.98f * msdState->fMeanPFlagCounter + 0.02f * msdState->lPFlagCounter;
else if( msdState->lPFlagCounter > 20)
msdState->fMeanPFlagCounter = 0.95f * msdState->fMeanPFlagCounter + 0.05f * msdState->lPFlagCounter;
else
msdState->fMeanPFlagCounter = 0.90f * msdState->fMeanPFlagCounter + 0.10f * msdState->lPFlagCounter;
}
}
if( msdState->lPFlagCounter == 0)
msdState->lConscPFlagCounter++;
else
msdState->lConscPFlagCounter = 0;
if( msdState->lConscPFlagCounter > 100 || msdState->lRCCounter > 150) msdState->fMeanPFlagCounter = 0.0f;
if ((encoderObj->sFrameCounter%64) == 0)
msdState->lPFlagCounter = 0;
if (codecType == G729E_CODEC){
if( fSpectralDifference > 0.15f && (fEnergy -msdState->fMeanFullBandEnergy)> 4.0f && (LLenergy> 50.0) )
*VadDecision =VAD_VOICE;
else if( (fSpectralDifference > 0.38f || (fEnergy -msdState->fMeanFullBandEnergy)> 4.0f ) && (LLenergy> 50.0f))
*VadDecision =VAD_VOICE;
else if( (msdState->fMeanPFlagCounter >= 10.0f || msdState->fMusicCounter >= 5.0f || encoderObj->sFrameCounter < 64)&& (LLenergy> 7.0))
*VadDecision =VAD_VOICE;
}
return;
}
void PitchTracking_G729FPE(int *pitchDelay, int *fracPitchDelay, int *prevPitchDelay, int *stat_N,
int *lStatPitch2PT, int *lStatFracPT)
{
int pitchDistance, minDist, lPitchMult;
int j, distSign;
pitchDistance = (*pitchDelay) - (*prevPitchDelay);
if(pitchDistance < 0) {
distSign = 0;
pitchDistance = - pitchDistance;
} else {
distSign = 1;
}
/* Test pitch stationnarity */
if (pitchDistance < 5) {
(*stat_N)++;
if (*stat_N > 7) *stat_N = 7 ;
*lStatPitch2PT = *pitchDelay;
*lStatFracPT = *fracPitchDelay;
} else {
/* Find multiples or sub-multiples */
minDist = pitchDistance;
if( distSign == 0) {
lPitchMult = 2 * (*pitchDelay);
for (j=2; j<5; j++) {
pitchDistance = abs(lPitchMult - (*prevPitchDelay));
if (pitchDistance <= minDist) {
minDist = pitchDistance;
}
lPitchMult += (*pitchDelay);
}
} else {
lPitchMult = 2 * (*prevPitchDelay);
for (j=2; j<5; j++) {
pitchDistance = abs(lPitchMult - (*pitchDelay));
if (pitchDistance <= minDist) {
minDist = pitchDistance;
}
lPitchMult += (*prevPitchDelay);
}
}
if (minDist < 5) { /* Multiple or sub-multiple detected */
if (*stat_N > 0) {
*pitchDelay = *lStatPitch2PT;
*fracPitchDelay = *lStatFracPT;
}
*stat_N -= 1;
if (*stat_N < 0) *stat_N = 0 ;
} else {
*stat_N = 0; /* No (sub-)multiple detected => Pitch transition */
*lStatPitch2PT = *pitchDelay;
*lStatFracPT = *fracPitchDelay;
}
}
*prevPitchDelay = *pitchDelay;
return;
}
void OpenLoopPitchSearch_G729_32f(const Ipp32f *pSrc, Ipp32s* lBestLag)
{
float fTmp;
Ipp64f dTmp;
float fMax1, fMax2, fMax3;
int max1Idx, max2Idx, max3Idx;
/* Find a maximum for three sections and compare the maxima
of each section by favoring small lag. */
/* First section: lag delay = PITCH_LAG_MAX to 80 */
ippsAutoCorrLagMax_32f(pSrc, FRM_LEN, 80,PITCH_LAG_MAX+1, &fMax1, &max1Idx);
/* Second section: lag delay = 79 to 40 */
ippsAutoCorrLagMax_32f(pSrc, FRM_LEN, 40,80, &fMax2, &max2Idx);
/* Third section: lag delay = 39 to 20 */
ippsAutoCorrLagMax_32f(pSrc, FRM_LEN, PITCH_LAG_MIN,40, &fMax3, &max3Idx);
ippsDotProd_32f64f(&pSrc[-max1Idx], &pSrc[-max1Idx], FRM_LEN, &dTmp);
fTmp = (float) (1.0f / sqrt(dTmp+0.01f));
fMax1 = (Ipp32f)(fMax1) * fTmp; /* max/sqrt(energy) */
ippsDotProd_32f64f(&pSrc[-max2Idx], &pSrc[-max2Idx], FRM_LEN, &dTmp);
fTmp = (float) (1.0f / sqrt(dTmp+0.01));
fMax2 = (Ipp32f)(fMax2) * fTmp; /* max/sqrt(energy) */
/* Calc energy */
ippsDotProd_32f64f(&pSrc[-max3Idx], &pSrc[-max3Idx], FRM_LEN, &dTmp);
/* 1/sqrt(energy) */
fTmp = 1.0f / (float)sqrt(dTmp+0.01);
fMax3 = (Ipp32f)(fMax3) * fTmp; /* max/sqrt(energy) */
/* Compare the 3 sections maxima and choose the small one. */
if ( fMax1 * PITCH_THRESH < fMax2 ) {
fMax1 = fMax2;
max1Idx = max2Idx;
}
if ( fMax1 * PITCH_THRESH < fMax3 ) max1Idx = max3Idx;
*lBestLag = max1Idx;
return;
}
static __ALIGN32 CONST float ToplAutoCorrMtr[LPC_ORDERP2+1]={
0.120089698456645f, 0.21398822343783f, 0.14767692339633f,
0.07018811903116f, 0.00980856433051f, -0.02015934721195f,
-0.02388269958005f, -0.01480076155002f, -0.00503292155509f,
0.00012141366508f, 0.00119354245231f, 0.00065908718613f,
0.00015015782285f
};
static __ALIGN32 CONST float a[14] = {
1.750000e-03f, -4.545455e-03f, -2.500000e+01f, 2.000000e+01f,
0.000000e+00f, 8.800000e+03f, 0.000000e+00f, 2.5e+01f,
-2.909091e+01f, 0.000000e+00f, 1.400000e+04f, 0.928571f,
-1.500000e+00f, 0.714285f
};
static __ALIGN32 CONST float b[14] = {
0.00085f, 0.001159091f, -5.0f, -6.0f, -4.7f, -12.2f, 0.0009f,
-7.0f, -4.8182f, -5.3f, -15.5f, 1.14285f, -9.0f, -2.1428571f
};
static int MakeDecision(float fLowBandEnergyDiff, float fFullBandEnergyDiff, float fSpectralDistortion, float fZeroCrossingDiff)
{
/* The spectral distortion vs zero-crossing difference */
if (fSpectralDistortion > a[0]*fZeroCrossingDiff+b[0]) {
return(VAD_VOICE);
}
if (fSpectralDistortion > a[1]*fZeroCrossingDiff+b[1]) {
return(VAD_VOICE);
}
/* full-band energy difference vs zero-crossing difference */
if (fFullBandEnergyDiff < a[2]*fZeroCrossingDiff+b[2]) {
return(VAD_VOICE);
}
if (fFullBandEnergyDiff < a[3]*fZeroCrossingDiff+b[3]) {
return(VAD_VOICE);
}
if (fFullBandEnergyDiff < b[4]) {
return(VAD_VOICE);
}
/* full-band energy difference vs the spectral distortion */
if (fFullBandEnergyDiff < a[5]*fSpectralDistortion+b[5]) {
return(VAD_VOICE);
}
if (fSpectralDistortion > b[6]) {
return(VAD_VOICE);
}
/* full-band energy difference vs zero-crossing difference */
if (fFullBandEnergyDiff < a[7]*fZeroCrossingDiff+b[7]) {
return(VAD_VOICE);
}
if (fFullBandEnergyDiff < a[8]*fZeroCrossingDiff+b[8]) {
return(VAD_VOICE);
}
if (fFullBandEnergyDiff < b[9]) {
return(VAD_VOICE);
}
/* low-band energy difference vs the spectral distortion */
if (fLowBandEnergyDiff < a[10]*fSpectralDistortion+b[10]) {
return(VAD_VOICE);
}
/* low-band energy difference vs full-band eneggy difference */
if (fLowBandEnergyDiff > a[11]*fFullBandEnergyDiff+b[11]) {
return(VAD_VOICE);
}
if (fLowBandEnergyDiff < a[12]*fFullBandEnergyDiff+b[12]) {
return(VAD_VOICE);
}
if (fLowBandEnergyDiff < a[13]*fFullBandEnergyDiff+b[13]) {
return(VAD_VOICE);
}
return(VAD_NOISE);
}
void VADGetSize(Ipp32s *pDstSize)
{
*pDstSize = sizeof(VADmemory);
return;
}
void VADInit(char *pVADmem)
{
VADmemory *vadState = (VADmemory *)pVADmem;
ippsZero_16s((short*)vadState,sizeof(VADmemory)>>1) ;
ippsZero_32f(vadState->MeanLSFVec, LPC_ORDER);
vadState->fMeanFullBandEnergy = 0.0f;
vadState->fMeanLowBandEnergy = 0.0f;
vadState->fMeanEnergy = 0.0f;
vadState->fMeanZeroCrossing = 0.0f;
vadState->lSilenceCounter = 0;
vadState->lUpdateCounter = 0;
vadState->lSmoothingCounter = 0;
vadState->lLessEnergyCounter = 0;
vadState->lFVD = 1;
vadState->fMinEnergy = IPP_MAXABS_32F;
return;
}
void VoiceActivityDetect_G729_32f(float ReflectCoeff, float *pLSF, float *pAutoCorr, float *pSrc, int FrameCounter,
int prevDecision, int prevPrevDecision, int *pVad, float *pEnergydB,char *pVADmem,float *pExtBuff)
{
float *pTmp;
float fSpectralDistortion, fFullBandEnergyDiff, fLowBandEnergyDiff, lNumZeroCrossing, fZeroCrossingDiff;
float fLowBandEnergy;
float fFullBandEnergy;
float zeroNum;
int i;
static __ALIGN32 CONST float vadTable[7][6]={
/* coeff C_coeff coeffZC C_coeffZC coeffSD C_coeffSD */
{ 0.75f, 0.25f, 0.8f, 0.2f, 0.6f, 0.4f},
{ 0.75f, 0.25f, 0.8f, 0.2f, 0.6f, 0.4f},
{ 0.95f, 0.05f, 0.92f, 0.08f, 0.65f, 0.35f},
{ 0.97f, 0.03f, 0.94f, 0.06f, 0.70f, 0.3f},
{ 0.99f, 0.01f, 0.96f, 0.04f, 0.75f, 0.25f},
{0.995f, 0.005f, 0.99f, 0.01f, 0.75f, 0.25f},
{0.995f, 0.005f, 0.998f, 0.002f, 0.75f, 0.25f},
};
const float *pVadTable;
VADmemory *vadState = (VADmemory *)pVADmem;
pTmp = &pExtBuff[0]; /*10 elements*/
/* compute the frame energy, full-band energy */
fFullBandEnergy = 10.0f * (float) log10( pAutoCorr[0]/240.0f + IPP_MINABS_32F);
*pEnergydB = fFullBandEnergy ;
/* compute the low-band energy (El)*/
ippsDotProd_32f(pAutoCorr, ToplAutoCorrMtr, LPC_ORDERP2+1, &fLowBandEnergy);
if (fLowBandEnergy < 0.0f) fLowBandEnergy = 0.0f;
fLowBandEnergy= 10.0f * (float) log10((float) (fLowBandEnergy/120.0f + IPP_MINABS_32F));
/* Normalize line spectral frequences */
for(i=0; i<LPC_ORDER; i++) pLSF[i] /= (float)IPP_2PI;
/* compute spectral distortion */
ippsSub_32f(pLSF, vadState->MeanLSFVec, pTmp, LPC_ORDER);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -