📄 fe_plcc.cpp
字号:
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#include "StdAfx.h"#include "FE_feature.h"
/* Table of constant values */static const double c_b17 = 0.33;static const double c_b28 = 10.0;
int Fe::_plp_cepstrum_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
_plp_basic(sample, frameSize, plp_cep, ceporder, norder);
return 1;
}
int Fe::plp_cepstrum_basic(short *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
vector<float> frameA(frameSize);
preprocessing(sample, frameSize, &frameA[0]);
m_window.Windowing(&frameA[0], frameSize, WIN_HAMMING);
_plp_cepstrum_basic(&frameA[0], frameSize, plp_cep, ceporder, norder);
return 1;
}
/*
* int _plp_basic(sample, sampleN, frameSize, plp_cep, ceporder, norder)
* Calculates the PLP coefficients of an input speech signal using the
* algorithm of Hynek Hermanski.
* sample (in): pointer to input speech data of type float
* sampleN (in): number of data points available in array
* frameSize (in): analysis window in samples
* Expon (in): peak enhancement factor
* norder (in): order of PLP model
* ceporder (in): order of cepstrum order
* Gain (in): gain flag (1 = gain; 0 no)
* plp_cep (out): pointer to output plp data of type float
* On exit returns the number of frames.
*/
int Fe::_plp_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
float gain; /* gain parameter of model */
vector<float> a(norder+1); /* auto regressive coefficients */
vector<float> rc(norder+1); /* reflection coefficients */
plp_analysis(sample, frameSize, norder, &a[0], &rc[0], &gain, m_sampleRate);
if(m_plpGain) for (int k = 0; k < norder + 1; k++) a[k] = a[k]/gain;
a2gexp_(&a[0], plp_cep, norder, ceporder+1, m_plpExpon);
plp_cep[0] = -plp_cep[0];
return 1;
}
/*
* plp_cep.c
*
* written by Hynek Hermanski
*
* translated by f2c (version of 30 January 1990 16:02:04).
* Modified by Chuck Wooters, ICSI, 7/6/90
*
* further modified at OGI -- array indices in some inner loops were
* replaced by pointers. the fft routine was replaces by TrigRecombFFT.
*
* Cleaned Up by Johan Schalkwyk (March 1993)
*
* The actual processing is split up into a few pieces:
* 1) power spectral analysis
* 2) auditory spectrum computation
* 3) compression
* 4) inverse FFT
* 5) autoregressive all-pole modeling (cepstral coefficients)
*/
int Fe::init_plp()
{
m_plpOrder = PLP_ORDER; /* model order */
m_plccOrder = PLP_CEP_ORDER; /* number of cepstral parameters excluding c0 */
m_plpGain = TRUE; /* gain flag */
m_plpExpon = (float)PLP_P_EXP; /* peak enhancemnt factor */
m_plpW.clear(); /* used by FFT() to hold W twiddle */
m_plpMofW = 0;
m_plpCF.clear(); /* Trigonometrix Recombination Coeff */
m_plpMofCF = 0;
m_plpIcall = 0; /* Initialized data? */
m_plpNfilt = 0;
return 1;
}
/** int plp_analysis(speech, frameSize, m, a, rc, gain, sf)* subroutine which computes m-th order (max 15) PLP model (described by * m+1 autoregressive coefficients a and gain or by m reflection * coefficients rc) from frameSize-points (max 512) of speech signal with* sampling frequency sf (max 20 000 Hz)* speech (in): pointer to speech signal (of type float)* frameSize (in): window (512 points typically) extractracted from speech* m (in): order of model. (m+1) autoregressive coefficients* a (in): pointer to auto regressive coefficients* rc (in): pointer to reflection coefficients* gain (in): pointer to model gain parameters* sf (in): sampling frequency*/int Fe::plp_analysis(float *speech, int frameSize, int m, float *a, float *rc, float *gain, int sf){
assert(frameSize<=512);
float spectr[512]; /* FFT spectrum */
float r[16], s; /* reflection coefficients */
float rcmct;
int jfilt; /* loop counter variable */
int nspt;
float alpmin; float audspe[23]; /* auditory spectrum coeff */ int npoint; /* number of spectral points */ int nsize; int nfft; /* number of points in fft */
int ib, ii, kk, mh, ip;
float aib; float aip, alp[17]; int mct, mct2; FeComplex cx[512], cy[512]; double d_1; /* temp double calculation variable */
FeComplex *cxp, *cend; float *sp; --speech; /* Parameter adjustments */ --a; --rc;
nfft = (int) (log(frameSize) / 0.693148) + 1; /* get fft size from the window length */
nsize = (int)(pow(2.0,(nfft))+0.5);
npoint = nsize / 2 + 1; /* get number of spectral points */ if (m_plpIcall == 0) { /* if called for the first time, */ audw_(npoint, &m_plpNfilt, m_plpCb, m_plpIbegen, sf); /* compute spectral weights coefficients */ cosf_(m, m_plpNfilt, m_plpWcos); /* compute IDFT coefficient table */ m_plpIcall = 1; }
/**********************************/ /* compute speech power spectrum */ /**********************************/ /* pack real signal into complex array for fast TrigRecombFFT */ cend = cx + frameSize/2; sp = speech + 1; for(cxp = cx;cxp < cend;cxp++){ cxp->m_re = *sp++; cxp->m_im = *sp++; } /* for odd length input */ if( ((int)(frameSize/2)) * 2 != frameSize){ cxp->m_re = *sp; cxp->m_im = 0.0; cxp++; } cend = cx + (1 << (nfft -1)); for(;cxp < cend;cxp++){ cxp->m_re = 0.0; cxp->m_im = 0.0; } /* whoops; need to check for odd.. */ TrigRecombFFT(cx,cy,nfft-1); int fft_size_2=(1 << (nfft -1));
for(ii=0;ii<fft_size_2;ii++){
spectr[ii]=cy[ii].m_im*cy[ii].m_im + cy[ii].m_re*cy[ii].m_re;
} spectr[fft_size_2]=0; /* owkwon: m_fftSpectrum[N/2]=0 */ /*****************************/ /* compute auditory spectrum */ /*****************************/ { for (jfilt = 2; jfilt <= m_plpNfilt - 1; ++jfilt) {
audspe[jfilt-1] = 0.0; for (ii=0;ii<m_plpIbegen[jfilt + 22]-m_plpIbegen[jfilt - 1]+1;ii++){ audspe[jfilt-1] += spectr[m_plpIbegen[jfilt - 1] - 1 + ii] * m_plpCb[m_plpIbegen[jfilt + 45] - 1 + ii]; } } } /*********************************************/ /* cubic root intensity-loudness compression */ /*********************************************/ for (ii = 2; ii <= m_plpNfilt - 1; ++ii) { d_1 = audspe[ii - 1]; audspe[ii - 1] = (float) pow(d_1, c_b17); } /* the first and last point of auditory spectrum */ audspe[0] = audspe[1]; audspe[m_plpNfilt - 1] = audspe[m_plpNfilt - 2]; /**************************************/ /* inverse discrete fourier transform */ /**************************************/ { float rtmp, *audp, *audend, *wcp; nspt = (m_plpNfilt - 1) << 1; // owkwon : nspt = nfilt - 1 << 1; for (kk = 1; kk <= m + 1; ++kk){ rtmp = audspe[0]; audend = audspe + m_plpNfilt; /* translation for indices and start point */ wcp = m_plpWcos + (2 + kk * 23 - 24); for (audp = audspe + 1; audp < audend; audp++){ rtmp += *audp * *wcp++; } r[kk - 1] = rtmp/nspt; } } if(r[0]==0){ /* to handle all-zero data */
r[0]=1;
} /*************************************/ /* solution for autoregressive model */ /*************************************/ a[1] = 1.0; alp[0] = r[0]; rc[1] = -r[1] / r[0]; a[2] = rc[1]; alp[1] = r[0] + r[1] * rc[1]; for (mct = 2; mct <= m; ++mct){ s = 0.0; mct2 = mct + 2; alpmin = alp[mct - 1]; for (ip = 1; ip <= mct; ++ip){ s += r[(mct2 - ip) - 1] * a[ip]; } rcmct = -s / alpmin; mh = mct / 2 + 1; for (ip = 2; ip <= mh; ++ip){ ib = mct2 - ip; aip = a[ip]; aib = a[ib]; a[ip] = aip + rcmct * aib; a[ib] = aib + rcmct * aip; } a[mct + 1] = rcmct; alp[mct] = alpmin - alpmin * rcmct * rcmct; rc[mct] = rcmct; } *gain = alp[m]; return (1);} /** int audw_ (npoint, nflit, cb, ibegen, sf)* Computes auditory weighting functions * npoint (in): number of points in the fft spectrum * nfilt (in): number of samples of auditory spectrum * equally spaced on the bark scale; * 1st filter at dc and last at the Nyquist frequency * cb (out): array of weighting coefficients to simulate * critical band spectral resolution * and equal loudness sensitivity of hearing * on npoint speech power spectrum * ibegen(out): three-dimensional array which indicates where * to begin and where to end integration * of speech spectrum and where the given * weighting function starts in array cb * get Nyquist frequency in Bark*/int Fe::audw_ ( int npoint, int *nfilt, float *cb, int *ibegen, int sf){ float r_1, r_2; /* temp calculation variables */ float d_1; float freq, zdel, rsss; /* Local variables */ int i, j; float x, z, f0, z0, f2samp, fh, fl, fnqbar; int icount; float fsq; --cb; /* Parameter adjustments */ ibegen -= 24; /* [j=1 + 23 - 24] --> [0] */ d_1 = sf / 1200.0; fnqbar = (float) (6.0 * log(d_1 + sqrt(d_1 * d_1 + 1.0))); *nfilt = (int) fnqbar + 2; /* compute number of filters for less than 1 Bark spacing */ f2samp = (float) ((npoint - 1.0) / (sf / 2.0)); /* frequency -> fft spectral sample conversion */ zdel = fnqbar / (float) (*nfilt - 1); /* compute filter step in Bark */ icount = 1; /* loop over all weighting functions */ for (j = 2; j <= (*nfilt - 1); ++j){ ibegen[j + 69] = icount; /* ibgen[23][3] --> [j][3] -->[j+69] */ z0 = zdel * (float) (j - 1); /* get center frequency of the j-th filter in Bark */ f0 = (float) (600.0* (exp(z0/6.0) - exp(-z0/6.0))/2.0); /* get center frequency of the j-th filter in Hz */ fl = (float) (600.0* (exp((z0 - 2.5)/6.0) - exp(-(z0 - 2.5)/6.0))/2.0); /* get low-cut frequency of j-th filter in Hz */ r_1 = fl * f2samp; ibegen[j + 23] = ((int) (r_1+0.5)) + 1; /* ibgen[j.1] -> [j+23] */ if (ibegen[j + 23] < 1) ibegen[j + 23] = 1; fh = (float) (600.0*(exp((z0 + 1.3)/6.0) - exp(-(z0 + 1.3)/6.0)) /2.0); /* get high-cut frequency of j-th filter in Hz */ r_1 = fh * f2samp; ibegen[j + 46] = ((int) (r_1 +0.5)) + 1; /* ibgen[j.2] -> [j+46] */ if (ibegen[j + 46] > npoint) ibegen[j + 46] = npoint; for (i = ibegen[j + 23]; i <= ibegen[j + 46]; ++i){ freq = ((float) (i - 1)) / f2samp; /* get frequency of j-th spectral point in Hz */ x = (float)(freq / 600.0); /* get frequency of j-th spectral point in Bark */ d_1 = x; z = (float) (6.0 * log(d_1 + sqrt(d_1 * d_1 + 1.0))); z -= z0; /* normalize by center frequency in barks: */ if (z <= -0.5) { /* compute weighting */ d_1 = (float)(z + 0.5); cb[icount] = (float) pow(c_b28, d_1); } else if (z >= 0.5){ d_1 = (float)((-2.5) * (z - 0.5)); cb[icount] = (float) pow(c_b28, d_1); } else{ cb[icount] = 1.0; } /* calculate the 40 db equal-loudness curve */ /* at center frequency and add it to the weighting */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -