⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 speechvoicescore.txt

📁 语音评分算法的实现,主要可以实现对一段语音信号进行判别并进行打分功能.
💻 TXT
📖 第 1 页 / 共 3 页
字号:
    
    if(onset>MIN_CONSEC_FRMS) {
      /* speech beginning detected */
      *start = frm - BACKOFF_FRMS ;
      if(*start<0) *start = 0 ;
      break ;
    }
  }
  
  /* detect speech ending point */
  for(frm=n_frames-1,noise_eng=0 ; frm>=n_frames-10; frm--) 
    noise_eng += FrameEnergy(smp,frm) ;	
  noise_eng /= 10 ; 
  
#if PRINT && RUN4PC && 0
   printf("Noise Energy at speech end: %f per frame\n", (float)noise_eng) ;	
#endif

  noise_eng *= ABOVE_NOISE_LEVEL ;
  
  onset=0 ;  *end = n_frames ;
  for (frm=n_frames-1; frm>=0; frm--) {
    e = FrameEnergy(smp,frm) ;
    if ( e >= noise_eng ) onset ++ ;
    else onset=0 ;
    
    if(onset>MIN_CONSEC_FRMS) {
      /* speech ending detected */
      *end = frm + BACKOFF_FRMS ;
      if(*end>n_frames) *end = n_frames;
      break ;
    }
  }
  
#if PRINT && RUN4PC 
  printf("Speech ending points detected at (frm): start=%d end=%d  (total %d frms)\n",
	 (int)*start,(int)*end,(int)(*end-*start+1)) ;
#endif

  return *end-*start+1 ;
}

#if RUN4PC

#define WAVHEAD 120

/** read  speech (PCM) from WAV file if running in PC **/
/* return num of samples read from file */
INT readwav(char *fn, SHORT *smp)
{
  FILE *fp1 ;
  INT len ;

  fp1 = fopen(fn,"rb") ;
  if(fp1==NULL) {
#if PRINT
    printf("Error: can't find the file %s\n", fn) ;
#endif
    exit(1) ;
  }
  fseek(fp1, 0l, SEEK_END);
  len = (ftell(fp1)-WAVHEAD)/2;  /** skip Wav header **/
  
  if (len > MAX_LEN) {
#if PRINT
    printf("file too long, truncating to %d[sec] !!\n", (int)(MAX_LEN/SAMPRATE));
#endif
    len = MAX_LEN;
  }
  fseek(fp1, (long)WAVHEAD, SEEK_SET) ;  /** skip Wav header **/
  fread(smp, 2, len, fp1);
  fclose(fp1);

  return len ;
}

#endif

/*****************************************************/
/** Module to code speech into mcep                 **/
/*****************************************************/

void FFT(INT *s)
{
    INT ii,jj,n,nn,limit,m,j,inc,i;
    FLOAT wr,wi;
    INT xre,xri;
    FLOAT theta ;

    n=FRAME_SIZE;
    nn=n/2; j = 1;
    for (ii=1;ii<=nn;ii++) {
        i = 2 * ii - 1;
        if (j>i) {
            xre = s[j]; xri = s[j + 1];
            s[j] = s[i];  s[j + 1] = s[i + 1];
            s[i] = xre; s[i + 1] = xri;
        }
        m = n / 2;
        while (m >= 2  && j > m) {
            j -= m; m /= 2;
#if CPA_F
COUNT.addition ++ ;
COUNT.multy ++ ;
#endif 
        }
        j += m;
    }
    limit = 2;
    while (limit < n) {
        inc = 2 * limit; theta = PIx2 / limit;
 
        for (ii=1; ii<=limit/2; ii++) {
            m = 2 * ii - 1;
            for (jj = 0; jj<=(n-m)/inc; jj++) {
                i = m + jj * inc;
                j = i + limit;
                
                nn = n/(2*limit)*(ii-1) ;
                wi = WI[nn] ;    wr = WR[nn]  ;
                
                xre = (wr*s[j] - wi*s[j+1])>>10 ;          
                xri = (wr*s[j+1] + wi*s[j])>>10 ;   
                
                s[j] = s[i] - xre; s[j + 1] = s[i + 1] - xri;
                s[i] = s[i] + xre; s[i + 1] = s[i + 1] + xri;                
#if CPA_F
COUNT.addition += 27 ;
COUNT.multy +=8 ;
#endif 
            }
        }
        limit = inc;
    }
}

/* HCode: main function to extract mcep features */
INT HCode(SHORT *smp, INT n_frames, FLOAT out[][OUTNUM], INT grid_size )
{
    INT i, j, k, bin; 
    INT f[2*FRAME_SIZE+2];
    INT bins[PORDER+2] ;
    INT enk;
    INT t1, t2;
    FLOAT min,max;
    INT frm, outfrm, skipfrm;
    FLOAT skip;

	skip = divi(n_frames,grid_size) ;
	        
    for (frm=0,outfrm=0; frm<=grid_size; frm++,outfrm++) { /* main loop */
    	skipfrm = (skip*frm)>>FPN ;   /*Modified referece code ????? */
    	
     	/* copy new frame */
    	for (j=0; j<FRAME_SIZE; j++) 
			f[j+1] = (INT)smp[skipfrm*FRAME_SHIFT+j] ;      /* scale down in fixed-point domain */
    	for (j=FRAME_SIZE; j<2*FRAME_SIZE; j++) f[j+1] = 0;

    	/** pre-emph frame **/
		for (j=FRAME_SIZE; j>=2; j--)  f[j] -= (f[j-1]*FLT_0_97)>>FPN ; 
		f[1] = (f[1]*FLT_0_03)>>FPN;  
		
      	/** HAMMING WINDOWS **/
    	for (j=0; j<FRAME_SIZE/2; j++)  f[j+1] = (f[j+1]*hamwin[j])>>FPN ;  
    	for (j=FRAME_SIZE/2; j<FRAME_SIZE; j++)  f[j+1] = (f[j+1]*hamwin[FRAME_SIZE-j-1])>>FPN; 
    	
#if CPA_F
COUNT.addition += 13*FRAME_SIZE ;
COUNT.multy += 3*FRAME_SIZE ;
#endif 
    	
      	/* FFT */
    	FFT(f) ; 
    	
    	/** calculate log-spectrum **/    	
    	for (j=0; j<15; j++) bins[j] = FLT_0_0 ;
    	bin = 0 ;
    	for (k = 2; k <= NDIV2; k++) {  /* fill bins */
			t1 = f[2*k-1]; 
			t2 = f[2*k];
			
			  /* 46340 * 46340 = MAX_32 */
			if( t1 > 46340 || t1 < -46340 || t2 > 46340 || t2 < -46340 ) {
				t1 >>= 5 ;  t2 >>= 5 ;    /*if t1 or t2 too big, scale down to calculate square */
				enk = t1*t1 + t2*t2 ;
				
				/*  enk = (int) sqrt((double)(enk)); */
				enk = INVSQRT / xinvsqrt(enk) ;
				
				if( enk < (INT)0x4000000 ) enk <<= 5 ;
				else enk = MAX_32  ;
		    }
			else {
				t1 *= t1 ;
				t2 *= t2 ;
				if(t1>MAX_32-t2)
				  enk = 46340 ;
				 else {
				 enk = t1 + t2 ;
				 /*enk = (int) sqrt((double)(enk));*/
				 enk = INVSQRT / xinvsqrt(enk) ;
			   }
			}
			
			if( k == binbnd[bin]) bin ++ ;

			if (enk<0x200000) t1 = ((INT)lowt[k-1]*enk)>>10 ;
		    else t1 = (INT)lowt[k-1]*(enk>>10) ;	
			
			if (bin>0) bins[bin] += t1;
			if (bin<PORDER) bins[bin+1] += (enk - t1);
#if CPA_F
COUNT.addition += 16 ;
COUNT.multy += 6 ;
#endif 		
	  	}
      
        for (bin=1; bin<=PORDER; bin++) {         /* take logs */
			t1 = bins[bin];
			if (t1<1) t1 = 1 ;
			
			/*bins[bin] = (INT)(log((double)t1)*1024.0) ;*/
			Log2(t1,&max,&min );

			 /** merge max and min into 1.5.10 fixed point format **/
			if(max>31) max= MAX_16; 
			else if (max<-32) max = MIN_16; 
			else max = (max<<10) | ( (min>>5)& (SHORT)0x7ff ) ;
			
			bins[bin] = (INT) mult(max, (FLOAT)0x2c5) ;   /* /log(2.0) --> covert log2() into log() */
		}
      
      	for (k=1; k<=OUTNUM; k++)  {
			t1 = 0 ;
			for (j=1; j<=PORDER; j++) {
				if (bins[j]>32767) bins[j] = 32767 ;
				if (bins[j]<-32768)  bins[j] = -32768 ;
	  			t1 += bins[j]*(INT)DCT[k-1][j-1];  /* DCT[][] is 1.0.15 FP format */
#if CPA_F
COUNT.addition += 7 ;
COUNT.multy += 1 ;
#endif   			
  			}
			out[outfrm][k-1] = (FLOAT) (t1>>15) ;   /** out[][] becomes 1.5.10 FP format */
      	}
    }
    return n_frames ;
}

INT Normalize_Pitch(SHORT *pitch, char *v, INT size)
{
  INT i,c;
  INT t,m,s ;
  
  m = s = 0 ; c=0 ;

  for(i=0; i<size; i++) {
     if (v[i]) { 
	     m += (INT) pitch[i];   /** pitch[] here is 1.13.2 FP format **/
	     c++ ;
#if CPA_F
COUNT.addition += 3 ;
#endif 
     } 
  }
  m /= c ;
  
  for(i=0; i<size; i++) 
    if (v[i]) {
      t = (INT)pitch[i] - m  ;
      s += t*t ; 
#if CPA_F
COUNT.addition += 3 ;
COUNT.multy += 1 ;
#endif  
    }
  s /= (c-1) ;
  
  /*s = sqrt((double) s);*/
  s = INVSQRT / xinvsqrt(s) ;

#if RUN4PC && PRINT && 0
  printf("Pitch Norm = %f %f\n", m/4.0, s/4.0) ;
#endif

  for(i=0; i<size; i++)   {
	  t = (INT)pitch[i] - m  ;
	  pitch[i] = (FLOAT) ((t<<10)/s);      
                 /** <<10 --> as result pitch[] becomes 1.5.10 FP format **/
#if CPA_F
COUNT.addition += 4 ;
COUNT.multy += 1 ;
#endif 
    }                           
  return size ;
}

INT CalcPitchScore(INT j1, INT j2)
{
	FLOAT ft, ftd ;
	INT s ; 
	
    ft =  (freq1[j1]-freq2[j2]);
    ft = (ft>0) ? ft : -ft ;
    
	ftd = ((freq1[j1]-freq1[j1-1]) - (freq2[j2]-freq2[j2-1])) ;
    ftd = (ftd>0) ? ftd : -ftd ;

    s = ft/PW1 + ftd/PW2 + PW3 * mult(ft,ftd) ;

	if( !voicedv1[j1] || !voicedv2[j2] ) s /= PW4 ;

#if RUN4PC && PRINT && 0
	printf("i=%d j=%d f1=%.2f(%d) f2=%.2f(%d) ft=%.2f ftd=%.2f s=%.2f\n", j1, j2, 
	freq1[j1]/1024.0, voicedv1[j1], freq2[j2]/1024.0, voicedv2[j2], ft/1024.0, 
	ftd/1024.0, s/1024.0) ;
#endif
#if CPA_F
COUNT.addition += 13 ;
COUNT.multy += 3 ;
#endif 
	return s ;
}

/********************************************************/
/**  Module to align two utterances via DTW            **/
/********************************************************/

/** Module to do DTW to align two utterance **/
/** always use square grid  **/

/* cals dist between two frames - euclid dist between two lpc vectors */
INT euclid_dist(FLOAT *vec1, FLOAT *vec2)
{
  SHORT i;
  INT s = 0, d;
  
  for(i=0; i<OUTNUM; i++) {
    d  = (INT) (vec1[i] - vec2[i]);  /** vec1[] vec2[] is 1.5.10 FB format */
    s += (d*d)>>10  ;
#if CPA_F
COUNT.addition += 5 ;
COUNT.multy += 1 ;
#endif 
  }
  return(s);
}

/** for a given column, calc legal low and high bounds
   depending on slopes and ends relaxation **/
/*******
  . . . . X   
  . . 1 2 .  
  . . . 3 .
  . . . . .
  
 *******/

/* the index function used to access the squeezed arrays */
#define INDEX(i,j)  (((i)+3)%3)*(2*BOUND)+((j)-(i)+BOUND)
#define SetFrom(i,j,val)  { SHORT t; int k,l; k=(i)*(2*BOUND/4)+((j)-(i)+BOUND)/4; l=(((j)-(i)+BOUND+4)%4)*2;  t=from[k]; t &= (~(3<<l)) ; t |= (val<<l); from[k]=t ; }
#define GetFrom(i,j)   3 & (from[(i)*(2*BOUND/4)+((j)-(i)+BOUND)/4]>>((((j)-(i)+BOUND+4)%4)*2)) 

/* main align procedure */
FLOAT align(INT grid_size, INT *align_x, INT *align_y)
{
  INT i_col,i_row, chosen_step;
  INT min, max;
  INT  s, align_score;
  INT  score[3*(2*BOUND)], t, tmp ;
  INT   local[3*(2*BOUND)];
  
  *align_x = *align_y = grid_size-1;
  align_score = MAX_32;
  chosen_step = 0;  

  for(i_row=0; i_row<3*(2*BOUND); i_row++) 
  	  local[i_row] = score[i_row] = MAX_32 ;
  	  
  /** The first column **/
   for (i_row=0; i_row<=BOUND-1; i_row++) {
    local[INDEX(0,i_row)] = s = euclid_dist(&(TeacherFeature[0][0]), &(StudentFeature[i_row][0])) ;
    score[INDEX(0,i_row)] = s * (i_row+1);
    SetFrom(0,i_row,FROM_SELF) ;
  }
  
  /* loop on columns */
  for (i_col=1; i_col<grid_size; i_col++) { 	
	  
    /* get legal grid points, upper & lower bounds for each column */
   min = i_col - BOUND  ;
   if ( min<0 ) min=0 ;
   max = i_col + BOUND - 1 ;
   if ( max >= grid_size )  max = grid_size - 1 ;
    
    /* loop on rows */
    for (i_row=min; i_row<=max; i_row++) { 
      t = tmp = MAX_32;   
      /* calc local scores for legal grid points. */
      local[INDEX(i_col,i_row)] = euclid_dist(&(TeacherFeature[i_col][0]), &(StudentFeature[i_row][0]));
      
#if RUN4PC && PRINT && 0
   printf("col=%d row=%d local=%f\n",i_col,i_row,local[INDEX(i_col,i_row)]/1024.0) ;
#endif      
      /* now find the best path to this point */
      /* Skip one frame in x-axis */
      if (i_col>=2 && i_row<i_col+BOUND-1 && score[INDEX(i_col-2,i_row-1)]<MAX_32 && score[INDEX(i_col-1, i_row)] <MAX_32) { /* recursion path #1 exists */
			tmp = score[INDEX(i_col-2, i_row-1)] + 
			      (local[INDEX(i_col-2,i_row-1)] + local[INDEX(i_col-1,i_row)])*PATH2_COST ;
			chosen_step = FROM_LEFT;
      	}
      if ( score[INDEX(i_col-1, i_row-1)] < MAX_32 ) 
		t = score[INDEX(i_col-1, i_row-1)] + PATH1_COST*local[INDEX(i_col-1,i_row-1)] ; 
      if (t<tmp) {
		tmp = t;
		chosen_step   = FROM_MID;
      }
      if ( i_row>=2 && i_row>i_col-BOUND && score[INDEX(i_col-1,i_row-2)]<MAX_32 &&  score[INDEX(i_col,i_row-1)] < MAX_32 ) { 
		  t = score[INDEX(i_col-1,i_row-2)] + 
		      (local[INDEX(i_col-1,i_row-2)] + local[INDEX(i_col,i_row-1)])*PATH2_COST ;
	      }
      if (t<tmp) {
		tmp = t;
		chosen_step = FROM_RIGHT;
      }
      
      if (tmp < MAX_32) {
		score[INDEX(i_col,i_row)]  = tmp;
		SetFrom(i_col,i_row,chosen_step) ;
	  }
	  
#if RUN4PC && PRINT && 0
     if (PRINT) printf("col=%d row=%d local=%.3f score=%.3f step=%d\n", i_col, i_row, local[INDEX(i_col,i_row)]/1024.0,  score[INDEX(i_col,i_row)]/1024.0,chosen_step); 
#endif

      /* if in a legal end point */
      if ((i_col==grid_size-1 && (grid_size-i_row)< EDGE_RELAX ) || 
      	  (i_row==grid_size-1 && (grid_size-i_col)< EDGE_RELAX ) ) {     
		/* if not the upper right corner, add to score to unbias short paths */
		if (i_col==grid_size-1) s = (grid_size-i_row) * local[INDEX(i_col,i_row)] ;
		if (i_row==grid_size-1) s = (grid_size-i_col) * local[INDEX(i_col,i_row)] ; 
		if (align_score > score[INDEX(i_col,i_row)] + s ) {
	  		align_score = score[INDEX(i_col,i_row)] + s ;
	  		*align_x = i_col;
	  		*align_y = i_row;
		}
      }
#if CPA_F
COUNT.addition += 100 ;
COUNT.multy += 26 ;
#endif      
       
    }   
  }  
  return((FLOAT)(align_score/(grid_size*2)));
}

/* go backwards and find path */
FLOAT backtrack(INT last_x, INT last_y, INT grid_size, INT len1, INT len2)
{
  INT  x, y,  path_len = 0 , step;
  INT pscore = 0 ;
  FLOAT skip1, skip2 ;
  
  skip1 = divi(len1,grid_size) ;
  skip2 = divi(len2,grid_size) ;
  
  x=last_x;
  y=last_y;

  while (x>0) {
    path_len++;
    step = GetFrom(x,y) ;
    
    pscore += CalcPitchScore( (x*skip1)>>10, (y*skip2)>>10 ) ;
    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -