📄 speechvoicescore.txt
字号:
if(onset>MIN_CONSEC_FRMS) {
/* speech beginning detected */
*start = frm - BACKOFF_FRMS ;
if(*start<0) *start = 0 ;
break ;
}
}
/* detect speech ending point */
for(frm=n_frames-1,noise_eng=0 ; frm>=n_frames-10; frm--)
noise_eng += FrameEnergy(smp,frm) ;
noise_eng /= 10 ;
#if PRINT && RUN4PC && 0
printf("Noise Energy at speech end: %f per frame\n", (float)noise_eng) ;
#endif
noise_eng *= ABOVE_NOISE_LEVEL ;
onset=0 ; *end = n_frames ;
for (frm=n_frames-1; frm>=0; frm--) {
e = FrameEnergy(smp,frm) ;
if ( e >= noise_eng ) onset ++ ;
else onset=0 ;
if(onset>MIN_CONSEC_FRMS) {
/* speech ending detected */
*end = frm + BACKOFF_FRMS ;
if(*end>n_frames) *end = n_frames;
break ;
}
}
#if PRINT && RUN4PC
printf("Speech ending points detected at (frm): start=%d end=%d (total %d frms)\n",
(int)*start,(int)*end,(int)(*end-*start+1)) ;
#endif
return *end-*start+1 ;
}
#if RUN4PC
#define WAVHEAD 120
/** read speech (PCM) from WAV file if running in PC **/
/* return num of samples read from file */
INT readwav(char *fn, SHORT *smp)
{
FILE *fp1 ;
INT len ;
fp1 = fopen(fn,"rb") ;
if(fp1==NULL) {
#if PRINT
printf("Error: can't find the file %s\n", fn) ;
#endif
exit(1) ;
}
fseek(fp1, 0l, SEEK_END);
len = (ftell(fp1)-WAVHEAD)/2; /** skip Wav header **/
if (len > MAX_LEN) {
#if PRINT
printf("file too long, truncating to %d[sec] !!\n", (int)(MAX_LEN/SAMPRATE));
#endif
len = MAX_LEN;
}
fseek(fp1, (long)WAVHEAD, SEEK_SET) ; /** skip Wav header **/
fread(smp, 2, len, fp1);
fclose(fp1);
return len ;
}
#endif
/*****************************************************/
/** Module to code speech into mcep **/
/*****************************************************/
void FFT(INT *s)
{
INT ii,jj,n,nn,limit,m,j,inc,i;
FLOAT wr,wi;
INT xre,xri;
FLOAT theta ;
n=FRAME_SIZE;
nn=n/2; j = 1;
for (ii=1;ii<=nn;ii++) {
i = 2 * ii - 1;
if (j>i) {
xre = s[j]; xri = s[j + 1];
s[j] = s[i]; s[j + 1] = s[i + 1];
s[i] = xre; s[i + 1] = xri;
}
m = n / 2;
while (m >= 2 && j > m) {
j -= m; m /= 2;
#if CPA_F
COUNT.addition ++ ;
COUNT.multy ++ ;
#endif
}
j += m;
}
limit = 2;
while (limit < n) {
inc = 2 * limit; theta = PIx2 / limit;
for (ii=1; ii<=limit/2; ii++) {
m = 2 * ii - 1;
for (jj = 0; jj<=(n-m)/inc; jj++) {
i = m + jj * inc;
j = i + limit;
nn = n/(2*limit)*(ii-1) ;
wi = WI[nn] ; wr = WR[nn] ;
xre = (wr*s[j] - wi*s[j+1])>>10 ;
xri = (wr*s[j+1] + wi*s[j])>>10 ;
s[j] = s[i] - xre; s[j + 1] = s[i + 1] - xri;
s[i] = s[i] + xre; s[i + 1] = s[i + 1] + xri;
#if CPA_F
COUNT.addition += 27 ;
COUNT.multy +=8 ;
#endif
}
}
limit = inc;
}
}
/* HCode: main function to extract mcep features */
INT HCode(SHORT *smp, INT n_frames, FLOAT out[][OUTNUM], INT grid_size )
{
INT i, j, k, bin;
INT f[2*FRAME_SIZE+2];
INT bins[PORDER+2] ;
INT enk;
INT t1, t2;
FLOAT min,max;
INT frm, outfrm, skipfrm;
FLOAT skip;
skip = divi(n_frames,grid_size) ;
for (frm=0,outfrm=0; frm<=grid_size; frm++,outfrm++) { /* main loop */
skipfrm = (skip*frm)>>FPN ; /*Modified referece code ????? */
/* copy new frame */
for (j=0; j<FRAME_SIZE; j++)
f[j+1] = (INT)smp[skipfrm*FRAME_SHIFT+j] ; /* scale down in fixed-point domain */
for (j=FRAME_SIZE; j<2*FRAME_SIZE; j++) f[j+1] = 0;
/** pre-emph frame **/
for (j=FRAME_SIZE; j>=2; j--) f[j] -= (f[j-1]*FLT_0_97)>>FPN ;
f[1] = (f[1]*FLT_0_03)>>FPN;
/** HAMMING WINDOWS **/
for (j=0; j<FRAME_SIZE/2; j++) f[j+1] = (f[j+1]*hamwin[j])>>FPN ;
for (j=FRAME_SIZE/2; j<FRAME_SIZE; j++) f[j+1] = (f[j+1]*hamwin[FRAME_SIZE-j-1])>>FPN;
#if CPA_F
COUNT.addition += 13*FRAME_SIZE ;
COUNT.multy += 3*FRAME_SIZE ;
#endif
/* FFT */
FFT(f) ;
/** calculate log-spectrum **/
for (j=0; j<15; j++) bins[j] = FLT_0_0 ;
bin = 0 ;
for (k = 2; k <= NDIV2; k++) { /* fill bins */
t1 = f[2*k-1];
t2 = f[2*k];
/* 46340 * 46340 = MAX_32 */
if( t1 > 46340 || t1 < -46340 || t2 > 46340 || t2 < -46340 ) {
t1 >>= 5 ; t2 >>= 5 ; /*if t1 or t2 too big, scale down to calculate square */
enk = t1*t1 + t2*t2 ;
/* enk = (int) sqrt((double)(enk)); */
enk = INVSQRT / xinvsqrt(enk) ;
if( enk < (INT)0x4000000 ) enk <<= 5 ;
else enk = MAX_32 ;
}
else {
t1 *= t1 ;
t2 *= t2 ;
if(t1>MAX_32-t2)
enk = 46340 ;
else {
enk = t1 + t2 ;
/*enk = (int) sqrt((double)(enk));*/
enk = INVSQRT / xinvsqrt(enk) ;
}
}
if( k == binbnd[bin]) bin ++ ;
if (enk<0x200000) t1 = ((INT)lowt[k-1]*enk)>>10 ;
else t1 = (INT)lowt[k-1]*(enk>>10) ;
if (bin>0) bins[bin] += t1;
if (bin<PORDER) bins[bin+1] += (enk - t1);
#if CPA_F
COUNT.addition += 16 ;
COUNT.multy += 6 ;
#endif
}
for (bin=1; bin<=PORDER; bin++) { /* take logs */
t1 = bins[bin];
if (t1<1) t1 = 1 ;
/*bins[bin] = (INT)(log((double)t1)*1024.0) ;*/
Log2(t1,&max,&min );
/** merge max and min into 1.5.10 fixed point format **/
if(max>31) max= MAX_16;
else if (max<-32) max = MIN_16;
else max = (max<<10) | ( (min>>5)& (SHORT)0x7ff ) ;
bins[bin] = (INT) mult(max, (FLOAT)0x2c5) ; /* /log(2.0) --> covert log2() into log() */
}
for (k=1; k<=OUTNUM; k++) {
t1 = 0 ;
for (j=1; j<=PORDER; j++) {
if (bins[j]>32767) bins[j] = 32767 ;
if (bins[j]<-32768) bins[j] = -32768 ;
t1 += bins[j]*(INT)DCT[k-1][j-1]; /* DCT[][] is 1.0.15 FP format */
#if CPA_F
COUNT.addition += 7 ;
COUNT.multy += 1 ;
#endif
}
out[outfrm][k-1] = (FLOAT) (t1>>15) ; /** out[][] becomes 1.5.10 FP format */
}
}
return n_frames ;
}
INT Normalize_Pitch(SHORT *pitch, char *v, INT size)
{
INT i,c;
INT t,m,s ;
m = s = 0 ; c=0 ;
for(i=0; i<size; i++) {
if (v[i]) {
m += (INT) pitch[i]; /** pitch[] here is 1.13.2 FP format **/
c++ ;
#if CPA_F
COUNT.addition += 3 ;
#endif
}
}
m /= c ;
for(i=0; i<size; i++)
if (v[i]) {
t = (INT)pitch[i] - m ;
s += t*t ;
#if CPA_F
COUNT.addition += 3 ;
COUNT.multy += 1 ;
#endif
}
s /= (c-1) ;
/*s = sqrt((double) s);*/
s = INVSQRT / xinvsqrt(s) ;
#if RUN4PC && PRINT && 0
printf("Pitch Norm = %f %f\n", m/4.0, s/4.0) ;
#endif
for(i=0; i<size; i++) {
t = (INT)pitch[i] - m ;
pitch[i] = (FLOAT) ((t<<10)/s);
/** <<10 --> as result pitch[] becomes 1.5.10 FP format **/
#if CPA_F
COUNT.addition += 4 ;
COUNT.multy += 1 ;
#endif
}
return size ;
}
INT CalcPitchScore(INT j1, INT j2)
{
FLOAT ft, ftd ;
INT s ;
ft = (freq1[j1]-freq2[j2]);
ft = (ft>0) ? ft : -ft ;
ftd = ((freq1[j1]-freq1[j1-1]) - (freq2[j2]-freq2[j2-1])) ;
ftd = (ftd>0) ? ftd : -ftd ;
s = ft/PW1 + ftd/PW2 + PW3 * mult(ft,ftd) ;
if( !voicedv1[j1] || !voicedv2[j2] ) s /= PW4 ;
#if RUN4PC && PRINT && 0
printf("i=%d j=%d f1=%.2f(%d) f2=%.2f(%d) ft=%.2f ftd=%.2f s=%.2f\n", j1, j2,
freq1[j1]/1024.0, voicedv1[j1], freq2[j2]/1024.0, voicedv2[j2], ft/1024.0,
ftd/1024.0, s/1024.0) ;
#endif
#if CPA_F
COUNT.addition += 13 ;
COUNT.multy += 3 ;
#endif
return s ;
}
/********************************************************/
/** Module to align two utterances via DTW **/
/********************************************************/
/** Module to do DTW to align two utterance **/
/** always use square grid **/
/* cals dist between two frames - euclid dist between two lpc vectors */
INT euclid_dist(FLOAT *vec1, FLOAT *vec2)
{
SHORT i;
INT s = 0, d;
for(i=0; i<OUTNUM; i++) {
d = (INT) (vec1[i] - vec2[i]); /** vec1[] vec2[] is 1.5.10 FB format */
s += (d*d)>>10 ;
#if CPA_F
COUNT.addition += 5 ;
COUNT.multy += 1 ;
#endif
}
return(s);
}
/** for a given column, calc legal low and high bounds
depending on slopes and ends relaxation **/
/*******
. . . . X
. . 1 2 .
. . . 3 .
. . . . .
*******/
/* the index function used to access the squeezed arrays */
#define INDEX(i,j) (((i)+3)%3)*(2*BOUND)+((j)-(i)+BOUND)
#define SetFrom(i,j,val) { SHORT t; int k,l; k=(i)*(2*BOUND/4)+((j)-(i)+BOUND)/4; l=(((j)-(i)+BOUND+4)%4)*2; t=from[k]; t &= (~(3<<l)) ; t |= (val<<l); from[k]=t ; }
#define GetFrom(i,j) 3 & (from[(i)*(2*BOUND/4)+((j)-(i)+BOUND)/4]>>((((j)-(i)+BOUND+4)%4)*2))
/* main align procedure */
FLOAT align(INT grid_size, INT *align_x, INT *align_y)
{
INT i_col,i_row, chosen_step;
INT min, max;
INT s, align_score;
INT score[3*(2*BOUND)], t, tmp ;
INT local[3*(2*BOUND)];
*align_x = *align_y = grid_size-1;
align_score = MAX_32;
chosen_step = 0;
for(i_row=0; i_row<3*(2*BOUND); i_row++)
local[i_row] = score[i_row] = MAX_32 ;
/** The first column **/
for (i_row=0; i_row<=BOUND-1; i_row++) {
local[INDEX(0,i_row)] = s = euclid_dist(&(TeacherFeature[0][0]), &(StudentFeature[i_row][0])) ;
score[INDEX(0,i_row)] = s * (i_row+1);
SetFrom(0,i_row,FROM_SELF) ;
}
/* loop on columns */
for (i_col=1; i_col<grid_size; i_col++) {
/* get legal grid points, upper & lower bounds for each column */
min = i_col - BOUND ;
if ( min<0 ) min=0 ;
max = i_col + BOUND - 1 ;
if ( max >= grid_size ) max = grid_size - 1 ;
/* loop on rows */
for (i_row=min; i_row<=max; i_row++) {
t = tmp = MAX_32;
/* calc local scores for legal grid points. */
local[INDEX(i_col,i_row)] = euclid_dist(&(TeacherFeature[i_col][0]), &(StudentFeature[i_row][0]));
#if RUN4PC && PRINT && 0
printf("col=%d row=%d local=%f\n",i_col,i_row,local[INDEX(i_col,i_row)]/1024.0) ;
#endif
/* now find the best path to this point */
/* Skip one frame in x-axis */
if (i_col>=2 && i_row<i_col+BOUND-1 && score[INDEX(i_col-2,i_row-1)]<MAX_32 && score[INDEX(i_col-1, i_row)] <MAX_32) { /* recursion path #1 exists */
tmp = score[INDEX(i_col-2, i_row-1)] +
(local[INDEX(i_col-2,i_row-1)] + local[INDEX(i_col-1,i_row)])*PATH2_COST ;
chosen_step = FROM_LEFT;
}
if ( score[INDEX(i_col-1, i_row-1)] < MAX_32 )
t = score[INDEX(i_col-1, i_row-1)] + PATH1_COST*local[INDEX(i_col-1,i_row-1)] ;
if (t<tmp) {
tmp = t;
chosen_step = FROM_MID;
}
if ( i_row>=2 && i_row>i_col-BOUND && score[INDEX(i_col-1,i_row-2)]<MAX_32 && score[INDEX(i_col,i_row-1)] < MAX_32 ) {
t = score[INDEX(i_col-1,i_row-2)] +
(local[INDEX(i_col-1,i_row-2)] + local[INDEX(i_col,i_row-1)])*PATH2_COST ;
}
if (t<tmp) {
tmp = t;
chosen_step = FROM_RIGHT;
}
if (tmp < MAX_32) {
score[INDEX(i_col,i_row)] = tmp;
SetFrom(i_col,i_row,chosen_step) ;
}
#if RUN4PC && PRINT && 0
if (PRINT) printf("col=%d row=%d local=%.3f score=%.3f step=%d\n", i_col, i_row, local[INDEX(i_col,i_row)]/1024.0, score[INDEX(i_col,i_row)]/1024.0,chosen_step);
#endif
/* if in a legal end point */
if ((i_col==grid_size-1 && (grid_size-i_row)< EDGE_RELAX ) ||
(i_row==grid_size-1 && (grid_size-i_col)< EDGE_RELAX ) ) {
/* if not the upper right corner, add to score to unbias short paths */
if (i_col==grid_size-1) s = (grid_size-i_row) * local[INDEX(i_col,i_row)] ;
if (i_row==grid_size-1) s = (grid_size-i_col) * local[INDEX(i_col,i_row)] ;
if (align_score > score[INDEX(i_col,i_row)] + s ) {
align_score = score[INDEX(i_col,i_row)] + s ;
*align_x = i_col;
*align_y = i_row;
}
}
#if CPA_F
COUNT.addition += 100 ;
COUNT.multy += 26 ;
#endif
}
}
return((FLOAT)(align_score/(grid_size*2)));
}
/* go backwards and find path */
FLOAT backtrack(INT last_x, INT last_y, INT grid_size, INT len1, INT len2)
{
INT x, y, path_len = 0 , step;
INT pscore = 0 ;
FLOAT skip1, skip2 ;
skip1 = divi(len1,grid_size) ;
skip2 = divi(len2,grid_size) ;
x=last_x;
y=last_y;
while (x>0) {
path_len++;
step = GetFrom(x,y) ;
pscore += CalcPitchScore( (x*skip1)>>10, (y*skip2)>>10 ) ;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -