📄 voice_nn.c

📁 语音LPC压缩算法源代码（C语音）
💻 C
字号:
/* SpeedEdit 184,196,24,0,0,10,16,10 Updated 08/10/92 09:10:50 */#include <stdio.h>#include <math.h>#include "filter.h"#define NFEATURE	3		/* number of features per voicing frame */#define NICLASS		3*NFEATURE	/* number of inputs to the neural net classifier */#define LPF_LENG	3		/* length of low pass filter used to estimate average v & uv energy */#define MAXDB		72.25		/* scale factor to normalize speech level */					/* 20.0*log10(4096), where 4096 = max signal amplitude */#define V_NOMINAL	0.6		/* nominal value of Elow_v_hat */#define UV_NOMINAL	0.3		/* nominal value of Elow_uv_hat */#define NFRAMES_ELOW	440		/* number of (half) frames of Elow history to save */#define BUF_FRACT	.1		/* fraction of Elow_buf used to determine El and Eh */#define THRESH		.2		/* threshhold for Eh - El to declare speech present in Elow_buf */#define NHIDDEN		2		/* number of hidden units used in the neural net classifier *//*     ----------------------------------------  CLASSIFY  ----------------------------------------*/static float classify(x)float x[];{   int i;   float out[NHIDDEN+1];   static float bias[] =    {	 4.7605e-01, -4.4143e-01, -2.5532e+01	};   static float weight0[] = {	 0.0000e+00,  9.9600e+00,  1.3399e+00	};	/* weight0[0] is not used */   static float weight1[] = {	 2.5327e-01,  5.1214e-02,  5.1845e-02,				 8.7389e-01,  6.6049e-02,  5.6143e-02,				 2.1496e-02,  6.4269e-02,  7.5030e-03	};   static float weight2[] = {	 9.1907e-01,  1.4349e+00, -5.3130e-03,				-2.1947e+00,  2.5551e+01,  4.0398e-01,				-1.9285e-01, -9.4621e-01,  7.7245e-01	};   /*   hidden unit 1   */   out[1] = bias[1];   for (i=0; i<NICLASS; i++) out[1] += weight1[i] * x[i];   out[1] = tanh((double) out[1]);   /*   hidden unit 2   */   out[2] = bias[2];   for (i=0; i<NICLASS; i++) out[2] += weight2[i] * x[i];   out[2] = tanh((double) out[2]);   /*   output unit   */   out[0] = bias[0];   for (i=1; i<=NHIDDEN; i++) out[0] += weight0[i] * out[i];   return tanh((double) out[0]);}/*     ----------------------------------------  AUTOCOR  ----------------------------------------*/static float autocor(signal, n, lag)float signal[];int n, lag;{   int i, p1, p2;   float cor1, cor2, cor3;   /*p1 = lag / 2;				/* to be centered in signal window, go back half of lag */   p1 = lag >> 1;				/* to be centered in signal window, go back half of lag */   p2 = lag - p1;				/* and forward the difference */   cor1 = cor2 = cor3 = 0.0;   for (i=0; i<n;  i++)   {      cor1 += signal[i-p1] * signal[i+p2];      cor2 += signal[i-p1] * signal[i-p1];      cor3 += signal[i+p2] * signal[i+p2];   }   if (cor2 == 0.0 || cor3 == 0.0) return 0.0;   else                            return (cor1 * cor1) / (cor2 * cor3);	/* range:  0 to +1 */}/*     ----------------------------------------  SINSERT  ----------------------------------------     This routine re-sorts a buffer which has a single (new) entry (perhaps) out of place.'sbuf' is the array sorted in ascending order; 'obuf' is another array to be re-ordered in the same manner;'k' is the index of the newly inserted entry.*/static void sinsert(sbuf, obuf, n, k)float sbuf[];							/*  sbuf[0:n-1]  */int obuf[], n, k;						/*  obuf[0:n-1]  */{   int i, j, tempi;   float temp;   if (k < n-1 && sbuf[k] > sbuf[k+1])				/* is new datum inserted too low in buffer? */   {      for (i=k+2; i<n; i++) if (sbuf[k] <= sbuf[i]) break;	/* i points to value to follow sbuf[k] */      temp  = sbuf[k];      tempi = obuf[k];      for (j=k; j<i-1; j++)      {         sbuf[j] = sbuf[j+1];         obuf[j] = obuf[j+1];      }      sbuf[i-1] = temp;      obuf[i-1] = tempi;   }   else if (k > 0 && sbuf[k] < sbuf[k-1])			/* is new datum inserted too high in buffer? */   {      for (i=k-2; i>=0; i--) if (sbuf[k] >= sbuf[i]) break;	/* i points to value to precede sbuf[k] */      temp  = sbuf[k];      tempi = obuf[k];      for (j=k; j>i+1; j--)      {         sbuf[j] = sbuf[j-1];         obuf[j] = obuf[j-1];      }      sbuf[i+1] = temp;      obuf[i+1] = tempi;   }}/*     ----------------------------------------  VFEATURES  ----------------------------------------*/static void vfeatures(speech, lpspeech, length, pitch, features)float speech[], lpspeech[], features[];				/* all indices start at 0 */int length, pitch;{   int i, nframes;   float Elow, Elown, Flow, Cpch;   float ss_low, ss_full, temp, El, Eh, Ethresh;   static int first = 1;				/* flag to indicate first execution of this routine */   static int indx = 0;   static float Elow_uv_hat;				/* estimate of average Elow when speech is absent */   static float Elow_v_hat;				/* estimate of average Elow when speech is present  */   static float avg_num[] = { 1.60283e-4, 3.20566e-4, 1.60283e-4 };   static float avg_den[] = { 1.0,       -1.949359,   0.95 };   static float Elow_buf[NFRAMES_ELOW];			/* the past NFRAMES_ELOW of Elow values */   static int Elow_order[NFRAMES_ELOW];   static FILTER *uvnf, *vnf;   static float uvnf_state[] = { 468.053, 468.053 };   static float  vnf_state[] = { 936.107, 936.107 };   if (first)							/* initialize on first pass */   {      first = 0;      /*   create the two filters used to estimate Elow when speech absent and present   */      uvnf = filter_create(avg_num, LPF_LENG, avg_den, LPF_LENG);      vnf  = filter_create(avg_num, LPF_LENG, avg_den, LPF_LENG);      /*   get the averaging filter outputs to nominal values   */      filter_state_set(uvnf, 2, uvnf_state);      filter_state_set( vnf, 2,  vnf_state);      Elow_v_hat  = filter(vnf,   V_NOMINAL);      Elow_uv_hat = filter(uvnf, UV_NOMINAL);      /*   initialize Elow_buf to nominal values   */      for (i=0; i<NFRAMES_ELOW; i++)      {         if (i < .5 * NFRAMES_ELOW) Elow_buf[i] = UV_NOMINAL;         else			    Elow_buf[i] = V_NOMINAL;         Elow_order[i] = i;      }   }   /*   Elow:  low band energy   */   ss_low = 0.0;   for (i=0; i<length;  i++) ss_low += lpspeech[i] * lpspeech[i];   Elow = ss_low / length;   if (Elow > 0.0) Elow = 10.0 * log10((double) Elow) - MAXDB;	/* convert to dB; fully loaded = 0 dB */   else            Elow = -100.0;				/* -100 dB if zero input */   if (Elow < -100.0) Elow = -100.0;				/* clamp negative end of range */   /*Elow = (Elow / 50.0) + 1.0;					/* -100 dB to 0 dB is scaled -1.0 to +1.0 */   Elow = (Elow * 20e-3) + 1.0;					/* -100 dB to 0 dB is scaled -1.0 to +1.0 */   /*   Determine the average speaking level   */   for (i=0; i<NFRAMES_ELOW; i++) if (Elow_order[i] == indx) break;	/* i = index of oldest Elow_buf sample */   Elow_buf[i] = Elow;						/* replace oldest sample with newest */   sinsert(Elow_buf, Elow_order, NFRAMES_ELOW, i);		/* re-sort buffers after insertion at index i *//**   indx = ++indx % NFRAMES_ELOW;				/* increment indx modulo NFRAMES_ELOW */   indx++;   if(indx == NFRAMES_ELOW) indx = 0;   El = Eh = 0.0;   nframes = BUF_FRACT * NFRAMES_ELOW;   for (i=0; i<nframes; i++)   {      El += Elow_buf[i];					/* sum the lowest  BUF_FRACT of Elow_buf */      Eh += Elow_buf[NFRAMES_ELOW-1 - i];			/* sum the highest BUF_FRACT of Elow_buf */   }   /*El /= nframes;*/   El *= 0.02272727272;   /*Eh /= nframes;*/   Eh *= 0.02272727272;   if (Eh - El > THRESH) Ethresh = .5 * (El + Eh);		/* if it's likely that Elow_buf has speech */   else			 Ethresh = .5 * (Elow_v_hat + Elow_uv_hat);   if (Elow > Ethresh) Elow_v_hat  = filter( vnf, Elow);	/* speech present this half-frame */   else		       Elow_uv_hat = filter(uvnf, Elow);	/* speech absent  this half-frame */   /*   Elown:  Elow normalized   */   Elown = Elow - Elow_v_hat + V_NOMINAL;   /*   Flow:  fraction of full band energy in low band   */   ss_full = 0.0;   for (i=0; i<length;  i++) ss_full += speech[i] * speech[i];   if (ss_full > 0.0) Flow = ss_low / ss_full;   else               Flow = 0.0;   if (Flow > 1.0) Flow = 1.0;					/* this happens quite often! */   Flow = 2.0 * Flow - 1.0;					/* range: -1 to +1 */   /*   Cpch:  autocorrelation of LPF(speech) @ one pitch period   */   Cpch = -2.0;   for (i=-1; i<=1; i++)   {      temp = 2.0 * autocor(lpspeech, length, pitch+i) - 1.0;	/* range: -1 to +1 */      if (temp > Cpch) Cpch = temp;   }   features[0] = Elown;   features[1] = Flow;   features[2] = Cpch;}/*     ----------------------------------------  VOICING  ----------------------------------------Determines two half-frame voicing decisions for the frame speech[start:end].*/void voicing(speech, lpspeech, start, end, pitch, v1, v2)float speech[], lpspeech[];int start, end, pitch, *v1, *v2;{   int i, vfleng;   float vclass;   static float fbuf[4*NFEATURE] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };   /* shift the feature buffer by two voicing frames */   for (i=0; i<2*NFEATURE; i++) fbuf[i] = fbuf[i + 2*NFEATURE];   /*vfleng = (end - start + 1) / 2;				/* voicing frame length */   vfleng = (end - start + 1) >> 1;				/* voicing frame length */   vfeatures(speech+start+vfleng,   lpspeech+start+vfleng,   vfleng, pitch, fbuf+6);   vfeatures(speech+start+2*vfleng, lpspeech+start+2*vfleng, vfleng, pitch, fbuf+9);   /* make the voicing classification for the first half frame */   *v1 = *v2 = 0;   vclass = classify(fbuf);   if (vclass >= 0.0) *v1 = 1;   /* make the voicing classification for the second half frame */   vclass = classify(fbuf+NFEATURE);   if (vclass >= 0.0) *v2 = 1;}
💿 文件大小 51 K
👤 上传用户 lingyun579
📂 所属分类通讯/手机编程
🏷️ 相关标签

#LPC #语音 #压缩算法 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -