📄 pesqdsp.c
字号:
if( start == 0L )
{
for( count = 0L; count < Nwindows; count++ )
VAD[count] = (float)fabs(VAD[count]);
VAD[0] = -LevelMin;
VAD[Nwindows-1] = -LevelMin;
}
count = 3;
while( count < (Nwindows-2) )
{
if( (VAD[count] > 0.0f) && (VAD[count-2] <= 0.0f) )
{
VAD[count-2] = VAD[count] * 0.1f;
VAD[count-1] = VAD[count] * 0.3f;
count++;
}
if( (VAD[count] <= 0.0f) && (VAD[count-1] > 0.0f) )
{
VAD[count] = VAD[count-1] * 0.3f;
VAD[count+1] = VAD[count-1] * 0.1f;
count += 3;
}
count++;
}
for( count = 0L; count < Nwindows; count++ )
if( VAD[count] < 0.0f ) VAD[count] = 0.0f;
if( LevelThresh <= 0.0f )
LevelThresh = LevelMin;
for( count = 0L; count < Nwindows; count++ )
{
if( VAD[count] <= LevelThresh )
logVAD[count] = 0.0f;
else
logVAD[count] = (float)log( VAD[count]/LevelThresh );
}
}
void crude_align(
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info,
long Utt_id, float * ftmp)
{
long nr;
long nd;
long startr;
long startd;
long count;
long I_max;
float max;
float * ref_VAD = (*ref_info).logVAD;
float * deg_VAD = (*deg_info).logVAD;
float * Y;
if( Utt_id == WHOLE_SIGNAL )
{
nr = (*ref_info).Nsamples / Downsample;
nd = (*deg_info).Nsamples / Downsample;
startr = 0L;
startd = 0L;
}
else if( Utt_id == MAXNUTTERANCES )
{
startr = (*err_info).UttSearch_Start[MAXNUTTERANCES-1];
startd = startr + (*err_info).Utt_DelayEst[MAXNUTTERANCES-1] / Downsample;
if ( startd < 0L )
{
startr = -(*err_info).Utt_DelayEst[MAXNUTTERANCES-1] / Downsample;
startd = 0L;
}
nr = (*err_info).UttSearch_End[MAXNUTTERANCES-1] - startr;
nd = nr;
if( startd + nd > (*deg_info).Nsamples / Downsample )
nd = (*deg_info).Nsamples / Downsample - startd;
}
else
{
startr = (*err_info).UttSearch_Start[Utt_id];
startd = startr + (*err_info).Crude_DelayEst / Downsample;
if ( startd < 0L )
{
startr = -(*err_info).Crude_DelayEst / Downsample;
startd = 0L;
}
nr = (*err_info).UttSearch_End[Utt_id] - startr;
nd = nr;
if( startd + nd > (*deg_info).Nsamples / Downsample )
nd = (*deg_info).Nsamples / Downsample - startd;
}
Y = ftmp;
if( (nr > 1L) && (nd > 1L) )
FFTNXCorr( ref_VAD + startr, nr, deg_VAD + startd, nd, Y );
max = 0.0f;
I_max = nr - 1;
if( (nr > 1L) && (nd > 1L) )
for( count = 0L; count < (nr+nd-1); count++ )
if( Y[count] > max )
{
max = Y[count];
I_max = count;
}
if( Utt_id == WHOLE_SIGNAL )
{
(*err_info).Crude_DelayEst = (I_max - nr + 1) * Downsample;
(*err_info).Crude_DelayConf = 0.0f;
}
else if( Utt_id == MAXNUTTERANCES )
{
(*err_info).Utt_Delay[MAXNUTTERANCES-1] =
(I_max - nr + 1) * Downsample + (*err_info).Utt_DelayEst[MAXNUTTERANCES-1];
}
else
{
(*err_info).Utt_DelayEst[Utt_id] =
(I_max - nr + 1) * Downsample + (*err_info).Crude_DelayEst;
}
FFTFree();
}
void time_align(
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info,
long Utt_id, float * ftmp )
{
long count;
long I_max;
float v_max;
long estdelay;
long startr;
long startd;
float * X1;
float * X2;
float * H;
float * Window;
float r1, i1;
long kernel;
float Hsum;
estdelay = (*err_info).Utt_DelayEst[Utt_id];
X1 = ftmp;
X2 = ftmp + Align_Nfft + 2;
H = (ftmp + 4 + 2 * Align_Nfft);
for( count = 0L; count < Align_Nfft; count++ )
H[count] = 0.0f;
Window = ftmp + 5 * Align_Nfft;
for( count = 0L; count < Align_Nfft; count++ )
Window[count] = (float)(0.5 * (1.0 - cos((TWOPI * count) / Align_Nfft)));
startr = (*err_info).UttSearch_Start[Utt_id] * Downsample;
startd = startr + estdelay;
if ( startd < 0L )
{
startr = -estdelay;
startd = 0L;
}
while( ((startd + Align_Nfft) <= (*deg_info).Nsamples) &&
((startr + Align_Nfft) <= ((*err_info).UttSearch_End[Utt_id] * Downsample)) )
{
for( count = 0L; count < Align_Nfft; count++ )
{
X1[count] = (*ref_info).data[count + startr] * Window[count];
X2[count] = (*deg_info).data[count + startd] * Window[count];
}
RealFFT( X1, Align_Nfft );
RealFFT( X2, Align_Nfft );
for( count = 0L; count <= Align_Nfft / 2; count++ )
{
r1 = X1[count * 2]; i1 = -X1[1 + (count * 2)];
X1[count * 2] = (r1 * X2[count * 2] - i1 * X2[1 + (count * 2)]);
X1[1 + (count * 2)] = (r1 * X2[1 + (count * 2)] + i1 * X2[count * 2]);
}
RealIFFT( X1, Align_Nfft );
v_max = 0.0f;
for( count = 0L; count < Align_Nfft; count++ )
{
r1 = (float) fabs(X1[count]);
X1[count] = r1;
if( r1 > v_max ) v_max = r1;
}
v_max *= 0.99f;
for( count = 0L; count < Align_Nfft; count++ )
if( X1[count] > v_max )
H[count] += (float) pow( v_max, 0.125 );
startr += (Align_Nfft / 4);
startd += (Align_Nfft / 4);
}
Hsum = 0.0f;
for( count = 0L; count < Align_Nfft; count++ )
{
Hsum += H[count];
X1[count] = H[count];
X2[count] = 0.0f;
}
X2[0] = 1.0f;
kernel = Align_Nfft / 64;
for( count = 1; count < kernel; count++ )
{
X2[count] = 1.0f - ((float)count) / ((float)kernel);
X2[(Align_Nfft - count)] = 1.0f - ((float)count) / ((float)kernel);
}
RealFFT( X1, Align_Nfft );
RealFFT( X2, Align_Nfft );
for( count = 0L; count <= Align_Nfft / 2; count++ )
{
r1 = X1[count * 2]; i1 = X1[1 + (count * 2)];
X1[count * 2] = (r1 * X2[count * 2] - i1 * X2[1 + (count * 2)]);
X1[1 + (count * 2)] = (r1 * X2[1 + (count * 2)] + i1 * X2[count * 2]);
}
RealIFFT( X1, Align_Nfft );
for( count = 0L; count < Align_Nfft; count++ )
{
if( Hsum > 0.0 )
H[count] = (float) fabs(X1[count]) / Hsum;
else
H[count] = 0.0f;
}
v_max = 0.0f;
I_max = 0L;
for( count = 0L; count < Align_Nfft; count++ )
if( H[count] > v_max )
{
v_max = H[count];
I_max = count;
}
if( I_max >= (Align_Nfft/2) )
I_max -= Align_Nfft;
(*err_info).Utt_Delay[Utt_id] = estdelay + I_max;
(*err_info).Utt_DelayConf[Utt_id] = v_max;
FFTFree();
}
void split_align( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
ERROR_INFO * err_info, float * ftmp,
long Utt_Start, long Utt_SpeechStart, long Utt_SpeechEnd, long Utt_End,
long Utt_DelayEst, float Utt_DelayConf,
long * Best_ED1, long * Best_D1, float * Best_DC1,
long * Best_ED2, long * Best_D2, float * Best_DC2,
long * Best_BP )
{
long count, bp, k;
long Utt_Len = Utt_SpeechEnd - Utt_SpeechStart;
long Utt_Test = MAXNUTTERANCES - 1;
long N_BPs;
long Utt_BPs[41];
long Utt_ED1[41], Utt_ED2[41];
long Utt_D1[41], Utt_D2[41];
float Utt_DC1[41], Utt_DC2[41];
long Delta, Step, Pad;
long estdelay;
long I_max;
float v_max, n_max;
long startr;
long startd;
float * X1;
float * X2;
float * H;
float * Window;
float r1, i1;
long kernel;
float Hsum;
*Best_DC1 = 0.0f;
*Best_DC2 = 0.0f;
X1 = ftmp;
X2 = ftmp + 2 + Align_Nfft;
H = (ftmp + 4 + 2 * Align_Nfft);
Window = ftmp + 6 + 3 * Align_Nfft;
for( count = 0L; count < Align_Nfft; count++ )
Window[count] = (float)(0.5 * (1.0 - cos((TWOPI * count) / Align_Nfft)));
kernel = Align_Nfft / 64;
Delta = Align_Nfft / (4 * Downsample);
Step = (long) ((0.801 * Utt_Len + 40 * Delta - 1)/(40 * Delta));
Step *= Delta;
Pad = Utt_Len / 10;
if( Pad < 75 ) Pad = 75;
Utt_BPs[0] = Utt_SpeechStart + Pad;
N_BPs = 0;
do {
N_BPs++;
Utt_BPs[N_BPs] = Utt_BPs[N_BPs-1] + Step;
} while( (Utt_BPs[N_BPs] <= (Utt_SpeechEnd - Pad)) && (N_BPs < 40) );
if( N_BPs <= 0 ) return;
for( bp = 0; bp < N_BPs; bp++ )
{
(*err_info).Utt_DelayEst[Utt_Test] = Utt_DelayEst;
(*err_info).UttSearch_Start[Utt_Test] = Utt_Start;
(*err_info).UttSearch_End[Utt_Test] = Utt_BPs[bp];
crude_align( ref_info, deg_info, err_info, MAXNUTTERANCES, ftmp);
Utt_ED1[bp] = (*err_info).Utt_Delay[Utt_Test];
(*err_info).Utt_DelayEst[Utt_Test] = Utt_DelayEst;
(*err_info).UttSearch_Start[Utt_Test] = Utt_BPs[bp];
(*err_info).UttSearch_End[Utt_Test] = Utt_End;
crude_align( ref_info, deg_info, err_info, MAXNUTTERANCES, ftmp);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -