📄 mfcc-core.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/** * @file   mfcc-core.c *  * <JA> * @brief  MFCC 泼魔翁の纷换 * * ここでは·岭をかけて艰り叫された不兰侨妨デ〖タから MFCC 泼魔翁を * 换叫するコア簇眶が羌められていますˉ * </JA> *  * <EN> * @brief  Compute MFCC parameter vectors * * These are core functions to compute MFCC vectors from windowed speech data. * </EN> *  * @author Akinobu Lee * @date   Mon Aug  7 11:55:45 2006 * * $Revision: 1.4 $ *  *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/mfcc.h>#ifdef MFCC_SINCOS_TABLE/**  * Generate table for hamming window. *  * @param w [i/o] MFCC calculation work area * @param framesize [in] window size */static voidmake_costbl_hamming(MFCCWork *w, int framesize){  int i;  float a;  w->costbl_hamming = (double *)mymalloc(sizeof(double) * framesize);  a = 2.0 * PI / (framesize - 1);  for(i=1;i<=framesize;i++) {    /*costbl_hamming[i-1] = 0.54 - 0.46 * cos(2 * PI * (i - 1) / (float)(framesize - 1));*/    w->costbl_hamming[i-1] = 0.54 - 0.46 * cos(a * (i - 1));  }  w->costbl_hamming_len = framesize;#ifdef MFCC_TABLE_DEBUG  jlog("Stat: mfcc-core: generated Hamming cos table (%d bytes)\n",       w->costbl_hamming_len * sizeof(double));#endif}/**  * Build tables for FFT. *  * @param w [i/o] MFCC calculation work area * @param n [in] 2^n = FFT point */static voidmake_fft_table(MFCCWork *w, int n){  int m;  int me, me1;    w->costbl_fft = (double *)mymalloc(sizeof(double) * n);  w->sintbl_fft = (double *)mymalloc(sizeof(double) * n);  for (m = 1; m <= n; m++) {    me = 1 << m;    me1 = me / 2;    w->costbl_fft[m-1] =  cos(PI / me1);    w->sintbl_fft[m-1] = -sin(PI / me1);  }  w->tbllen = n;#ifdef MFCC_TABLE_DEBUG  jlog("Stat: mfcc-core: generated FFT sin/cos table (%d bytes)\n", w->tbllen * sizeof(double));#endif}/**  * Generate table for DCT operation to make mfcc from fbank. *  * @param w [i/o] MFCC calculation work area * @param fbank_num [in] number of filer banks * @param mfcc_dim [in] number of dimensions in MFCC */static voidmake_costbl_makemfcc(MFCCWork *w, int fbank_num, int mfcc_dim){  int size;  int i, j, k;  float B, C;  size = fbank_num * mfcc_dim;  w->costbl_makemfcc = (double *)mymalloc(sizeof(double) * size);  B = PI / fbank_num;  k = 0;  for(i=1;i<=mfcc_dim;i++) {    C = i * B;    for(j=1;j<=fbank_num;j++) {      w->costbl_makemfcc[k] = cos(C * (j - 0.5));      k++;    }  }  w->costbl_makemfcc_len = size;#ifdef MFCC_TABLE_DEBUG  jlog("Stat: mfcc-core: generated MakeMFCC cos table (%d bytes)\n",       w->costbl_makemfcc_len * sizeof(double));#endif}/**  * Generate table for weighing cepstrum. *  * @param w [i/o] MFCC calculation work area * @param lifter [in] cepstral liftering coefficient * @param mfcc_dim [in] number of dimensions in MFCC */static voidmake_sintbl_wcep(MFCCWork *w, int lifter, int mfcc_dim){  int i;  float a, b;  w->sintbl_wcep = (double *)mymalloc(sizeof(double) * mfcc_dim);  a = PI / lifter;  b = lifter / 2.0;  for(i=0;i<mfcc_dim;i++) {    w->sintbl_wcep[i] = 1.0 + b * sin((i+1) * a);  }  w->sintbl_wcep_len = mfcc_dim;#ifdef MFCC_TABLE_DEBUG  jlog("Stat: mfcc-core: generated WeightCepstrum sin table (%d bytes)\n",       w->sintbl_wcep_len * sizeof(double));#endif}#endif /* MFCC_SINCOS_TABLE *//**  * Return mel-frequency. *  * @param k [in] channel number of filter bank * @param fres [in] constant value computed by "1.0E7 / (para.smp_period * fb.fftN * 700.0)" *  * @return the mel frequency. */float Mel(int k, float fres){  return(1127 * log(1 + (k-1) * fres));}/** * Create fbank center frequency for VTLN. * * @param cf [i/o] center frequency of channels in Mel, will be changed considering VTLN * @param para [in] analysis parameter * @param mlo [in] fbank lower bound in Mel * @param mhi [in] fbank upper bound in Mel * @param maxChan [in] maximum number of channels *  */static booleanVTLN_recreate_fbank_cf(float *cf, Value *para, float mlo, float mhi, int maxChan){  int chan;  float minf, maxf, cf_orig, cf_new;  float scale, cu, cl, au, al;  /* restore frequency range to non-Mel */  minf = 700.0 * (exp(mlo / 1127.0) - 1.0);  maxf = 700.0 * (exp(mhi / 1127.0) - 1.0);  if (para->vtln_upper > maxf) {    jlog("Error: VTLN upper cut-off greater than upper frequency bound: %.1f > %.1f\n", para->vtln_upper, maxf);    return FALSE;  }  if (para->vtln_lower < minf) {    jlog("Error: VTLN lower cut-off smaller than lower frequency bound: %.1f < %.1f\n", para->vtln_lower, minf);    return FALSE;  }    /* prepare variables for warping */  scale = 1.0 / para->vtln_alpha;  cu = para->vtln_upper * 2 / ( 1 + scale);  cl = para->vtln_lower * 2 / ( 1 + scale);  au = (maxf - cu * scale) / (maxf - cu);  al = (cl * scale - minf) / (cl - minf);    for (chan = 1; chan <= maxChan; chan++) {    /* get center frequency, restore to non-Mel */    cf_orig = 700.0 * (exp(cf[chan] / 1127.0) - 1.0);    /* do warping */    if( cf_orig > cu ){      cf_new = au * (cf_orig - cu) + scale * cu;    } else if ( cf_orig < cl){      cf_new = al * (cf_orig - minf) + minf;    } else {      cf_new = scale * cf_orig;    }    /* convert the new center frequency to Mel and store */    cf[chan] = 1127.0 * log (1.0 + cf_new / 700.0);  }  return TRUE;}/**  * Build filterbank information and generate tables for MFCC comptutation. *  * @param w [i/o] MFCC calculation work area * @param para [in] configuration parameters *  * @return the generated filterbank information.  */booleanInitFBank(MFCCWork *w, Value *para){  float mlo, mhi, ms, melk;  int k, chan, maxChan, nv2;  /* Calculate FFT size */  w->fb.fftN = 2;  w->fb.n = 1;  while(para->framesize > w->fb.fftN){    w->fb.fftN *= 2; w->fb.n++;  }  nv2 = w->fb.fftN / 2;  w->fb.fres = 1.0E7 / (para->smp_period * w->fb.fftN * 700.0);  maxChan = para->fbank_num + 1;  w->fb.klo = 2;   w->fb.khi = nv2;  mlo = 0;      mhi = Mel(nv2 + 1, w->fb.fres);  /* lo pass filter */  if (para->lopass >= 0) {    mlo = 1127*log(1+(float)para->lopass/700.0);    w->fb.klo = ((float)para->lopass * para->smp_period * 1.0e-7 * w->fb.fftN) + 2.5;    if (w->fb.klo<2) w->fb.klo = 2;  }  /* hi pass filter */  if (para->hipass >= 0) {    mhi = 1127*log(1+(float)para->hipass/700.0);    w->fb.khi = ((float)para->hipass * para->smp_period * 1.0e-7 * w->fb.fftN) + 0.5;    if (w->fb.khi>nv2) w->fb.khi = nv2;  }  /* Create vector of fbank centre frequencies */  w->fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float));  ms = mhi - mlo;  for (chan = 1; chan <= maxChan; chan++)     w->fb.cf[chan] = ((float)chan / maxChan)*ms + mlo;  if (para->vtln_alpha != 1.0) {    /* Modify fbank center frequencies for VTLN */    if (VTLN_recreate_fbank_cf(w->fb.cf, para, mlo, mhi, maxChan) == FALSE) {      return FALSE;    }  }  /* Create loChan map, loChan[fftindex] -> lower channel index */  w->fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short));  for(k = 1, chan = 1; k <= nv2; k++){    if (k < w->fb.klo || k > w->fb.khi) w->fb.loChan[k] = -1;    else {      melk = Mel(k, w->fb.fres);      while (w->fb.cf[chan] < melk && chan <= maxChan) ++chan;      w->fb.loChan[k] = chan - 1;    }  }  /* Create vector of lower channel weights */     w->fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float));  for(k = 1; k <= nv2; k++) {    chan = w->fb.loChan[k];    if (k < w->fb.klo || k > w->fb.khi) w->fb.loWt[k] = 0.0;    else {      if (chan > 0) 	w->fb.loWt[k] = (w->fb.cf[chan + 1] - Mel(k, w->fb.fres)) / (w->fb.cf[chan + 1] - w->fb.cf[chan]);      else	w->fb.loWt[k] = (w->fb.cf[1] - Mel(k, w->fb.fres)) / (w->fb.cf[1] - mlo);    }  }    /* Create workspace for fft */  w->fb.Re = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float));  w->fb.Im = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float));  w->sqrt2var = sqrt(2.0 / para->fbank_num);  return TRUE;}/**  * Free FBankInfo. *  * @param fb [in] filterbank information */voidFreeFBank(FBankInfo *fb){  free(fb->cf);  free(fb->loChan);  free(fb->loWt);  free(fb->Re);  free(fb->Im);}/**  * Remove DC offset per frame *  * @param wave [i/o] waveform data in the current frame * @param framesize [in] frame size *  */voidZMeanFrame(float *wave, int framesize){		     int i;  float mean;  mean = 0.0;  for(i = 1; i <= framesize; i++) mean += wave[i];  mean /= framesize;  for(i = 1; i <= framesize; i++) wave[i] -= mean;}/**  * Calculate Log Raw Energy. *  * @param wave [in] waveform data in the current frame * @param framesize [in] frame size *  * @return the calculated log raw energy. */float CalcLogRawE(float *wave, int framesize){		     int i;  double raw_E = 0.0;  float energy;  for(i = 1; i <= framesize; i++)    raw_E += wave[i] * wave[i];  energy = (float)log(raw_E);  return(energy);}/**  * Apply pre-emphasis filter. *  * @param wave [i/o] waveform data in the current frame * @param framesize [i/o] frame size in samples * @param preEmph [in] pre-emphasis coef. */void PreEmphasise (float *wave, int framesize, float preEmph){  int i;
12 下一页
💿 文件大小 1116 K
👤 上传用户 a415834839
📂 所属分类网络
🏷️ 相关标签

#recognition #version #julius #about
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -