📄 wav2mfcc-buffer.c
字号:
/** * @file wav2mfcc-buffer.c * @author Akinobu LEE * @date Thu Feb 17 17:43:35 2005 * * <JA> * @brief 不兰侨妨から MFCC 泼魔翁へ恃垂する(券厦帽疤) * * ここでは不兰侨妨链挛を帽疤として MFCC ベクトル废误へ恃垂する簇眶が年盗 * されていますˉフレ〖ム帽疤で藐叫を乖う簇眶は wav2mfcc-pipe.c に * 淡揭されています * * ここで藐叫できるのは MFCC[_0][_E][_D][_A][_Z] の妨及ですˉ * </JA> * * <EN> * @brief Convert speech inputs into MFCC parameter vectors (per utterance) * * This file contains functions to convert the whole speech input * to MFCC vector array. The frame-wise MFCC computation needed for * real-time recognition is defined in wav2mfcc-pipe.c. * * The supported format is MFCC[_0][_E][_D][_A][_Z]. * </EN> * * $Revision: 1.6 $ * *//************************************************************************//* wav2mfcc.c Convert Speech file to MFCC_E_D_(Z) file *//*----------------------------------------------------------------------*//* Author : Yuichiro Nakano *//* *//* Copyright(C) Yuichiro Nakano 1996-1998 *//*----------------------------------------------------------------------*//************************************************************************/#include <sent/stddefs.h>#include <sent/mfcc.h>/** * Convert wave data to MFCC. Also does spectral subtraction * if @a ssbuf specified. * * @param wave [in] waveform data * @param mfcc [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated * @param para [in] configuration parameters * @param nSamples [in] length of waveform data * @param ssbuf [in] buffer that holds noise spectrum to be subtracted from input, or NULL if not use spectral subtraction * @param ssbuflen [in] length of above, ignored when @a ssbuf is NULL * * @return the number of processed frames. */int Wav2MFCC(SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen){ float *bf; /* Work space for FFT */ double *fbank; /* Filterbank */ int i, k, t; int end = 0, start = 1; int frame_num; /* Number of samples in output file */ int bflen; /* initialize module */ WMP_init(para, &bf, ssbuf, ssbuflen); frame_num = (int)((nSamples - para.framesize) / para.frameshift) + 1; for(t = 0; t < frame_num; t++){ if(end != 0) start = end - (para.framesize - para.frameshift) - 1; k = 1; for(i = start; i <= start + para.framesize; i++){ bf[k] = (float)wave[i - 1]; k++; } end = i; /* Calculate base MFCC coefficients */ WMP_calc(mfcc[t], bf, para, ssbuf); } /* Normalise Log Energy */ if (para.energy && para.enormal) NormaliseLogE(mfcc, frame_num, para); /* Delta (consider energy suppress) */ if (para.delta) Delta(mfcc, frame_num, para); /* Acceleration */ if (para.acc) Accel(mfcc, frame_num, para); /* Cepstrum Mean Normalization */ if(para.cmn) CMN(mfcc, frame_num, para.mfcc_dim); /* free module */ WMP_calc_fin(bf); return(frame_num);}/** * Normalise log energy * * @param mfcc [i/o] array of MFCC vectors * @param frame_num [in] number of frames * @param para [in] configuration parameters */void NormaliseLogE(float **mfcc, int frame_num, Value para){ float max, min, f; int t; int l; l = para.mfcc_dim; if (para.c0) l++; /* find max log energy */ max = mfcc[0][l]; for(t = 0; t < frame_num; t++) if(mfcc[t][l] > max) max = mfcc[t][l]; /* set the silence floor */ min = max - (para.silFloor * LOG_TEN) / 10.0; /* normalise */ for(t = 0; t < frame_num; t++){ f = mfcc[t][l]; if (f < min) f = min; mfcc[t][l] = 1.0 - (max - f) * para.escale; }}/** * Calculate delta coefficients * * @param c [i/o] MFCC vectors, in which the delta coeff. will be appended. * @param frame [in] number of frames * @param para [in] configuration parameters */void Delta(float **c, int frame, Value para){ int theta, t, n, B = 0; float A1, A2, sum; float *ed; for(theta = 1; theta <= para.delWin; theta++) B += theta * theta; if (para.absesup) ed = (float *)mymalloc(sizeof(float) * frame); for(t = 0; t < frame; t++){ for(n = 0; n < para.baselen; n++){ sum = 0; for(theta = 1; theta <= para.delWin; theta++){ /* Replicate the first or last vector */ /* at the beginning and end of speech */ if (t - theta < 0) A1 = c[0][n]; else A1 = c[t - theta][n]; if (t + theta >= frame) A2 = c[frame - 1][n]; else A2 = c[t + theta][n]; sum += theta * (A2 - A1); } if (para.absesup && n == para.baselen-1) { ed[t] = sum / (2 * B); } else { c[t][para.baselen + n] = sum / (2 * B); } } } if (para.absesup) { for (t=0;t<frame;t++) { memmove(&(c[t][para.baselen-1]), &(c[t][para.baselen]), sizeof(float) * (para.baselen - 1)); c[t][para.baselen * 2 - 2] = ed[t]; } free(ed); }}/** * Calculate acceleration coefficients. * * @param c [i/o] MFCC vectors, in which the delta coeff. will be appended. * @param frame [in] number of frames * @param para [in] configuration parameters */void Accel(float **c, int frame, Value para){ int theta, t, n, B = 0; int src, dst; float A1, A2, sum; for(theta = 1; theta <= para.accWin; theta++) B += theta * theta; for(t = 0; t < frame; t++){ src = para.baselen * 2 - 1; if (para.absesup) src--; dst = src + para.baselen; for(n = 0; n < para.baselen; n++){ sum = 0; for(theta = 1; theta <= para.accWin; theta++){ /* Replicate the first or last vector */ /* at the beginning and end of speech */ if (t - theta < 0) A1 = c[0][src]; else A1 = c[t - theta][src]; if (t + theta >= frame) A2 = c[frame - 1][src]; else A2 = c[t + theta][src]; sum += theta * (A2 - A1); } c[t][dst] = sum / (2 * B); src--; dst--; } }}/** * Cepstrum Mean Normalization (buffered) * Cepstral mean will be computed within the given MFCC vectors. * * @param mfcc [i/o] array of MFCC vectors * @param frame_num [in] number of frames * @param dim [in] total dimension of MFCC vectors */void CMN(float **mfcc, int frame_num, int dim){ int i, t; float *mfcc_ave, *sum; mfcc_ave = (float *)mycalloc(dim, sizeof(float)); sum = (float *)mycalloc(dim, sizeof(float)); for(i = 0; i < dim; i++){ sum[i] = 0.0; for(t = 0; t < frame_num; t++) sum[i] += mfcc[t][i]; mfcc_ave[i] = sum[i] / frame_num; } for(t = 0; t < frame_num; t++){ for(i = 0; i < dim; i++) mfcc[t][i] = mfcc[t][i] - mfcc_ave[i]; } free(sum); free(mfcc_ave);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -