📄 mfcc-core.c
字号:
/** * @file mfcc-core.c * * <JA> * @brief MFCC 泼魔翁の纷换 * * ここでは·岭をかけて艰り叫された不兰侨妨デ〖タから MFCC 泼魔翁を * 换叫するコア簇眶が羌められていますˉ * </JA> * * <EN> * @brief Compute MFCC parameter vectors * * These are core functions to compute MFCC vectors from windowed speech data. * </EN> * * @author Akinobu Lee * @date Mon Aug 7 11:55:45 2006 * * $Revision: 1.4 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/mfcc.h>#ifdef MFCC_SINCOS_TABLE/** * Generate table for hamming window. * * @param w [i/o] MFCC calculation work area * @param framesize [in] window size */static voidmake_costbl_hamming(MFCCWork *w, int framesize){ int i; float a; w->costbl_hamming = (double *)mymalloc(sizeof(double) * framesize); a = 2.0 * PI / (framesize - 1); for(i=1;i<=framesize;i++) { /*costbl_hamming[i-1] = 0.54 - 0.46 * cos(2 * PI * (i - 1) / (float)(framesize - 1));*/ w->costbl_hamming[i-1] = 0.54 - 0.46 * cos(a * (i - 1)); } w->costbl_hamming_len = framesize;#ifdef MFCC_TABLE_DEBUG jlog("Stat: mfcc-core: generated Hamming cos table (%d bytes)\n", w->costbl_hamming_len * sizeof(double));#endif}/** * Build tables for FFT. * * @param w [i/o] MFCC calculation work area * @param n [in] 2^n = FFT point */static voidmake_fft_table(MFCCWork *w, int n){ int m; int me, me1; w->costbl_fft = (double *)mymalloc(sizeof(double) * n); w->sintbl_fft = (double *)mymalloc(sizeof(double) * n); for (m = 1; m <= n; m++) { me = 1 << m; me1 = me / 2; w->costbl_fft[m-1] = cos(PI / me1); w->sintbl_fft[m-1] = -sin(PI / me1); } w->tbllen = n;#ifdef MFCC_TABLE_DEBUG jlog("Stat: mfcc-core: generated FFT sin/cos table (%d bytes)\n", w->tbllen * sizeof(double));#endif}/** * Generate table for DCT operation to make mfcc from fbank. * * @param w [i/o] MFCC calculation work area * @param fbank_num [in] number of filer banks * @param mfcc_dim [in] number of dimensions in MFCC */static voidmake_costbl_makemfcc(MFCCWork *w, int fbank_num, int mfcc_dim){ int size; int i, j, k; float B, C; size = fbank_num * mfcc_dim; w->costbl_makemfcc = (double *)mymalloc(sizeof(double) * size); B = PI / fbank_num; k = 0; for(i=1;i<=mfcc_dim;i++) { C = i * B; for(j=1;j<=fbank_num;j++) { w->costbl_makemfcc[k] = cos(C * (j - 0.5)); k++; } } w->costbl_makemfcc_len = size;#ifdef MFCC_TABLE_DEBUG jlog("Stat: mfcc-core: generated MakeMFCC cos table (%d bytes)\n", w->costbl_makemfcc_len * sizeof(double));#endif}/** * Generate table for weighing cepstrum. * * @param w [i/o] MFCC calculation work area * @param lifter [in] cepstral liftering coefficient * @param mfcc_dim [in] number of dimensions in MFCC */static voidmake_sintbl_wcep(MFCCWork *w, int lifter, int mfcc_dim){ int i; float a, b; w->sintbl_wcep = (double *)mymalloc(sizeof(double) * mfcc_dim); a = PI / lifter; b = lifter / 2.0; for(i=0;i<mfcc_dim;i++) { w->sintbl_wcep[i] = 1.0 + b * sin((i+1) * a); } w->sintbl_wcep_len = mfcc_dim;#ifdef MFCC_TABLE_DEBUG jlog("Stat: mfcc-core: generated WeightCepstrum sin table (%d bytes)\n", w->sintbl_wcep_len * sizeof(double));#endif}#endif /* MFCC_SINCOS_TABLE *//** * Return mel-frequency. * * @param k [in] channel number of filter bank * @param fres [in] constant value computed by "1.0E7 / (para.smp_period * fb.fftN * 700.0)" * * @return the mel frequency. */float Mel(int k, float fres){ return(1127 * log(1 + (k-1) * fres));}/** * Create fbank center frequency for VTLN. * * @param cf [i/o] center frequency of channels in Mel, will be changed considering VTLN * @param para [in] analysis parameter * @param mlo [in] fbank lower bound in Mel * @param mhi [in] fbank upper bound in Mel * @param maxChan [in] maximum number of channels * */static booleanVTLN_recreate_fbank_cf(float *cf, Value *para, float mlo, float mhi, int maxChan){ int chan; float minf, maxf, cf_orig, cf_new; float scale, cu, cl, au, al; /* restore frequency range to non-Mel */ minf = 700.0 * (exp(mlo / 1127.0) - 1.0); maxf = 700.0 * (exp(mhi / 1127.0) - 1.0); if (para->vtln_upper > maxf) { jlog("Error: VTLN upper cut-off greater than upper frequency bound: %.1f > %.1f\n", para->vtln_upper, maxf); return FALSE; } if (para->vtln_lower < minf) { jlog("Error: VTLN lower cut-off smaller than lower frequency bound: %.1f < %.1f\n", para->vtln_lower, minf); return FALSE; } /* prepare variables for warping */ scale = 1.0 / para->vtln_alpha; cu = para->vtln_upper * 2 / ( 1 + scale); cl = para->vtln_lower * 2 / ( 1 + scale); au = (maxf - cu * scale) / (maxf - cu); al = (cl * scale - minf) / (cl - minf); for (chan = 1; chan <= maxChan; chan++) { /* get center frequency, restore to non-Mel */ cf_orig = 700.0 * (exp(cf[chan] / 1127.0) - 1.0); /* do warping */ if( cf_orig > cu ){ cf_new = au * (cf_orig - cu) + scale * cu; } else if ( cf_orig < cl){ cf_new = al * (cf_orig - minf) + minf; } else { cf_new = scale * cf_orig; } /* convert the new center frequency to Mel and store */ cf[chan] = 1127.0 * log (1.0 + cf_new / 700.0); } return TRUE;}/** * Build filterbank information and generate tables for MFCC comptutation. * * @param w [i/o] MFCC calculation work area * @param para [in] configuration parameters * * @return the generated filterbank information. */booleanInitFBank(MFCCWork *w, Value *para){ float mlo, mhi, ms, melk; int k, chan, maxChan, nv2; /* Calculate FFT size */ w->fb.fftN = 2; w->fb.n = 1; while(para->framesize > w->fb.fftN){ w->fb.fftN *= 2; w->fb.n++; } nv2 = w->fb.fftN / 2; w->fb.fres = 1.0E7 / (para->smp_period * w->fb.fftN * 700.0); maxChan = para->fbank_num + 1; w->fb.klo = 2; w->fb.khi = nv2; mlo = 0; mhi = Mel(nv2 + 1, w->fb.fres); /* lo pass filter */ if (para->lopass >= 0) { mlo = 1127*log(1+(float)para->lopass/700.0); w->fb.klo = ((float)para->lopass * para->smp_period * 1.0e-7 * w->fb.fftN) + 2.5; if (w->fb.klo<2) w->fb.klo = 2; } /* hi pass filter */ if (para->hipass >= 0) { mhi = 1127*log(1+(float)para->hipass/700.0); w->fb.khi = ((float)para->hipass * para->smp_period * 1.0e-7 * w->fb.fftN) + 0.5; if (w->fb.khi>nv2) w->fb.khi = nv2; } /* Create vector of fbank centre frequencies */ w->fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float)); ms = mhi - mlo; for (chan = 1; chan <= maxChan; chan++) w->fb.cf[chan] = ((float)chan / maxChan)*ms + mlo; if (para->vtln_alpha != 1.0) { /* Modify fbank center frequencies for VTLN */ if (VTLN_recreate_fbank_cf(w->fb.cf, para, mlo, mhi, maxChan) == FALSE) { return FALSE; } } /* Create loChan map, loChan[fftindex] -> lower channel index */ w->fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short)); for(k = 1, chan = 1; k <= nv2; k++){ if (k < w->fb.klo || k > w->fb.khi) w->fb.loChan[k] = -1; else { melk = Mel(k, w->fb.fres); while (w->fb.cf[chan] < melk && chan <= maxChan) ++chan; w->fb.loChan[k] = chan - 1; } } /* Create vector of lower channel weights */ w->fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float)); for(k = 1; k <= nv2; k++) { chan = w->fb.loChan[k]; if (k < w->fb.klo || k > w->fb.khi) w->fb.loWt[k] = 0.0; else { if (chan > 0) w->fb.loWt[k] = (w->fb.cf[chan + 1] - Mel(k, w->fb.fres)) / (w->fb.cf[chan + 1] - w->fb.cf[chan]); else w->fb.loWt[k] = (w->fb.cf[1] - Mel(k, w->fb.fres)) / (w->fb.cf[1] - mlo); } } /* Create workspace for fft */ w->fb.Re = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); w->fb.Im = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); w->sqrt2var = sqrt(2.0 / para->fbank_num); return TRUE;}/** * Free FBankInfo. * * @param fb [in] filterbank information */voidFreeFBank(FBankInfo *fb){ free(fb->cf); free(fb->loChan); free(fb->loWt); free(fb->Re); free(fb->Im);}/** * Remove DC offset per frame * * @param wave [i/o] waveform data in the current frame * @param framesize [in] frame size * */voidZMeanFrame(float *wave, int framesize){ int i; float mean; mean = 0.0; for(i = 1; i <= framesize; i++) mean += wave[i]; mean /= framesize; for(i = 1; i <= framesize; i++) wave[i] -= mean;}/** * Calculate Log Raw Energy. * * @param wave [in] waveform data in the current frame * @param framesize [in] frame size * * @return the calculated log raw energy. */float CalcLogRawE(float *wave, int framesize){ int i; double raw_E = 0.0; float energy; for(i = 1; i <= framesize; i++) raw_E += wave[i] * wave[i]; energy = (float)log(raw_E); return(energy);}/** * Apply pre-emphasis filter. * * @param wave [i/o] waveform data in the current frame * @param framesize [i/o] frame size in samples * @param preEmph [in] pre-emphasis coef. */void PreEmphasise (float *wave, int framesize, float preEmph){ int i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -