📄 fe_plcc.cpp

📁 这是一个语音特征提取的程序源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////

#include "StdAfx.h"#include "FE_feature.h"

/* Table of constant values       */static const double c_b17 = 0.33;static const double c_b28 = 10.0;
int Fe::_plp_cepstrum_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
	_plp_basic(sample, frameSize, plp_cep, ceporder, norder);
	return 1;
}


int Fe::plp_cepstrum_basic(short *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
	vector<float> frameA(frameSize);
	preprocessing(sample, frameSize, &frameA[0]);
	m_window.Windowing(&frameA[0], frameSize, WIN_HAMMING);
	_plp_cepstrum_basic(&frameA[0], frameSize, plp_cep, ceporder, norder);
	return 1;
}


/*
* int _plp_basic(sample, sampleN, frameSize, plp_cep, ceporder, norder)
* Calculates the PLP coefficients of an input speech signal using the
* algorithm of Hynek Hermanski.
* sample       (in): pointer to input speech data of type float
* sampleN      (in): number of data points available in array 
* frameSize    (in): analysis window in samples
* Expon        (in): peak enhancement factor
* norder       (in): order of PLP model
* ceporder     (in): order of cepstrum order
* Gain         (in): gain flag (1 = gain; 0 no)
* plp_cep     (out): pointer to output plp data of type float
* On exit returns the number of frames.
*/

int Fe::_plp_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder)
{
	float gain;                  /* gain parameter of model          */
	vector<float> a(norder+1);        /* auto regressive coefficients     */
	vector<float> rc(norder+1);       /* reflection coefficients          */
	plp_analysis(sample, frameSize, norder, &a[0], &rc[0], &gain, m_sampleRate);
	if(m_plpGain) for (int k = 0; k < norder + 1; k++) a[k] = a[k]/gain;
	a2gexp_(&a[0], plp_cep, norder, ceporder+1, m_plpExpon);
	plp_cep[0] = -plp_cep[0];
	return 1;
}


/*
* plp_cep.c
*
* written by Hynek Hermanski
*
* translated by f2c (version of 30 January 1990  16:02:04).
* Modified by Chuck Wooters, ICSI, 7/6/90
*
* further modified at OGI -- array indices in some inner loops were
* replaced by pointers.  the fft routine was replaces by TrigRecombFFT.
*
* Cleaned Up by Johan Schalkwyk (March 1993)
*
* The actual processing is split up into a few pieces:
* 	1) power spectral analysis
*	2) auditory spectrum computation
*	3) compression
*	4) inverse FFT
*	5) autoregressive all-pole modeling (cepstral coefficients)
*/


int Fe::init_plp()
{
	m_plpOrder = PLP_ORDER;	/* model order                  */
	m_plccOrder = PLP_CEP_ORDER;	/* number of cepstral parameters excluding c0 */
	m_plpGain = TRUE;		/* gain flag                    */
	m_plpExpon = (float)PLP_P_EXP;		/* peak enhancemnt factor       */
	m_plpW.clear();               /* used by FFT() to hold W twiddle   */
	m_plpMofW = 0;
	m_plpCF.clear();             /* Trigonometrix Recombination Coeff  */
	m_plpMofCF = 0;

	m_plpIcall = 0;       /* Initialized data?               */		
	m_plpNfilt = 0;

	return 1;
}

/** int plp_analysis(speech, frameSize, m, a, rc, gain, sf)* subroutine which computes m-th order (max 15) PLP model (described by * m+1 autoregressive coefficients a and gain or by m reflection * coefficients rc) from frameSize-points (max 512) of speech signal with* sampling frequency sf (max 20 000 Hz)* speech (in): pointer to speech signal (of type float)* frameSize  (in): window (512 points typically) extractracted from speech* m      (in): order of model. (m+1) autoregressive coefficients* a      (in): pointer to auto regressive coefficients* rc     (in): pointer to reflection coefficients* gain   (in): pointer to model gain parameters* sf     (in): sampling frequency*/int Fe::plp_analysis(float *speech, int frameSize, int m, float *a, float *rc, float *gain, int sf){
	assert(frameSize<=512);
	float spectr[512];         /* FFT spectrum                     */
    float r[16], s;              /* reflection coefficients          */
    float rcmct;
    int   jfilt;                 /* loop counter variable            */
    int   nspt;
    float   alpmin;    float   audspe[23];          /* auditory spectrum coeff          */    int     npoint;              /* number of spectral points        */    int     nsize;                            int   nfft;                  /* number of points in fft          */
    int   ib, ii, kk, mh, ip;
    float   aib;    float   aip, alp[17];    int     mct, mct2;    FeComplex cx[512], cy[512];    double d_1;                         /* temp double calculation variable */
	
    FeComplex *cxp, *cend;    float   *sp;	    --speech;                            /* Parameter adjustments           */    --a;    --rc;	
	nfft = (int) (log(frameSize) / 0.693148) + 1; /* get fft size from the window length   */
	nsize = (int)(pow(2.0,(nfft))+0.5);
	npoint = nsize / 2 + 1; /* get number of spectral points         */    if (m_plpIcall == 0) {                   /* if called for the first time,    */		audw_(npoint, &m_plpNfilt, m_plpCb, m_plpIbegen, sf);  /* compute spectral weights coefficients */			cosf_(m, m_plpNfilt, m_plpWcos);	   /* compute IDFT coefficient table        */			m_plpIcall = 1;	}	
	
    /**********************************/    /*  compute speech power spectrum */    /**********************************/	    /* pack real signal into complex array for fast TrigRecombFFT */    cend = cx + frameSize/2;    sp = speech + 1;    for(cxp = cx;cxp < cend;cxp++){		cxp->m_re = *sp++;		cxp->m_im = *sp++;	}	    /* for odd length input */    if( ((int)(frameSize/2)) * 2 != frameSize){		cxp->m_re = *sp;		cxp->m_im = 0.0;		cxp++;	}	    cend = cx + (1 << (nfft -1));    for(;cxp < cend;cxp++){		cxp->m_re = 0.0;		cxp->m_im = 0.0;	}	    /* whoops; need to check for odd.. */	    TrigRecombFFT(cx,cy,nfft-1);		int fft_size_2=(1 << (nfft -1));
	for(ii=0;ii<fft_size_2;ii++){
		spectr[ii]=cy[ii].m_im*cy[ii].m_im + cy[ii].m_re*cy[ii].m_re;
	}	spectr[fft_size_2]=0; /* owkwon: m_fftSpectrum[N/2]=0 */		    /*****************************/    /* compute auditory spectrum */    /*****************************/    {				for (jfilt = 2; jfilt <= m_plpNfilt - 1; ++jfilt) {
			audspe[jfilt-1] = 0.0;			for (ii=0;ii<m_plpIbegen[jfilt + 22]-m_plpIbegen[jfilt - 1]+1;ii++){				audspe[jfilt-1] += spectr[m_plpIbegen[jfilt - 1] - 1 + ii] * m_plpCb[m_plpIbegen[jfilt + 45] - 1 + ii];			}		}    }		    /*********************************************/    /* cubic root intensity-loudness compression */    /*********************************************/	for (ii = 2; ii <= m_plpNfilt - 1; ++ii) {		d_1 = audspe[ii - 1];		audspe[ii - 1] = (float) pow(d_1, c_b17);	}		/* the first and last point of auditory spectrum */	audspe[0] = audspe[1];	audspe[m_plpNfilt - 1] = audspe[m_plpNfilt - 2];		    /**************************************/    /* inverse discrete fourier transform */    /**************************************/    {		float rtmp, *audp, *audend, *wcp;				nspt = (m_plpNfilt - 1) << 1; // owkwon : nspt = nfilt - 1 << 1;		for (kk = 1; kk <= m + 1; ++kk){			rtmp = audspe[0];			audend = audspe + m_plpNfilt;						/* translation for indices and start point */			wcp = m_plpWcos + (2 + kk * 23 - 24);			for (audp = audspe + 1; audp < audend; audp++){				rtmp += *audp * *wcp++;			}			r[kk - 1] = rtmp/nspt;		}    }	if(r[0]==0){ /* to handle all-zero data */
		r[0]=1;
	}	    /*************************************/    /* solution for autoregressive model */    /*************************************/    a[1]   = 1.0;    alp[0] = r[0];    rc[1]  = -r[1] / r[0];    a[2]   = rc[1];    alp[1] = r[0] + r[1] * rc[1];	    for (mct = 2; mct <= m; ++mct){		s    = 0.0;		mct2 = mct + 2;		alpmin = alp[mct - 1];				for (ip = 1; ip <= mct; ++ip){			s += r[(mct2 - ip) - 1] * a[ip];		}				rcmct = -s / alpmin;		mh = mct / 2 + 1;				for (ip = 2; ip <= mh; ++ip){			ib = mct2 - ip;			aip = a[ip];			aib = a[ib];			a[ip] = aip + rcmct * aib;			a[ib] = aib + rcmct * aip;		}		a[mct + 1] = rcmct;		alp[mct] = alpmin - alpmin * rcmct * rcmct;		rc[mct] = rcmct;	}    *gain = alp[m];	    return (1);} /** int audw_ (npoint, nflit, cb, ibegen, sf)* Computes auditory weighting functions           * npoint (in): number of points in the fft spectrum                 * nfilt  (in): number of samples of auditory spectrum               *              equally spaced on the bark scale;                     *              1st filter at dc and last at the Nyquist frequency                                                                      * cb    (out): array of weighting coefficients to simulate               *              critical band spectral resolution                        *              and equal loudness sensitivity of hearing                *              on npoint speech power spectrum                          * ibegen(out): three-dimensional array which indicates where         *              to begin and where to end integration                *              of speech spectrum and where the given            *              weighting function starts in array cb                *              get Nyquist frequency in Bark*/int Fe::audw_ ( int npoint, int *nfilt, float *cb, int *ibegen, int sf){    float  r_1, r_2;                          /* temp calculation variables */    float  d_1;	    float freq, zdel, rsss;            /* Local variables            */    int i, j;    float x, z, f0, z0, f2samp, fh, fl, fnqbar;    int icount;    float fsq;	    --cb;                                     /* Parameter adjustments      */    ibegen -= 24;                             /* [j=1 + 23 - 24] --> [0]    */	    d_1    = sf / 1200.0;                         fnqbar = (float) (6.0 * log(d_1 + sqrt(d_1 * d_1 + 1.0)));          *nfilt = (int) fnqbar + 2;	/* compute number of filters for less than 1 Bark spacing */        f2samp = (float) ((npoint - 1.0) / (sf / 2.0));	/* frequency -> fft spectral sample conversion            */        zdel = fnqbar / (float) (*nfilt - 1);    /* compute filter step in Bark */        icount = 1;                       /* loop over all weighting functions  */    for (j = 2; j <= (*nfilt - 1); ++j){		ibegen[j + 69] = icount;      /* ibgen[23][3] --> [j][3] -->[j+69]  */ 				z0 = zdel * (float) (j - 1);		/* get center frequency of the j-th filter in Bark        */				f0 = (float) (600.0* (exp(z0/6.0) - exp(-z0/6.0))/2.0);		/* get center frequency of the j-th filter in Hz          */				fl = (float) (600.0* (exp((z0 - 2.5)/6.0) - exp(-(z0 - 2.5)/6.0))/2.0);		/* get low-cut frequency of j-th filter in Hz             */				r_1 = fl * f2samp;		ibegen[j + 23] =  ((int) (r_1+0.5)) + 1;    /* ibgen[j.1] -> [j+23] */		if (ibegen[j + 23] < 1)			ibegen[j + 23] = 1;				fh = (float) (600.0*(exp((z0 + 1.3)/6.0) - exp(-(z0 + 1.3)/6.0)) /2.0);		/* get high-cut frequency of j-th filter in Hz           */				r_1 = fh * f2samp;		ibegen[j + 46] = ((int) (r_1 +0.5)) + 1;   /* ibgen[j.2] -> [j+46]  */		if (ibegen[j + 46] > npoint)			ibegen[j + 46] = npoint;				for (i = ibegen[j + 23]; i <= ibegen[j + 46]; ++i){			freq = ((float) (i - 1)) / f2samp;			/* get frequency of j-th spectral point in Hz   */						x = (float)(freq / 600.0);			/* get frequency of j-th spectral point in Bark */						d_1 = x;			z = (float) (6.0 * log(d_1 + sqrt(d_1 * d_1 + 1.0)));						z -= z0;	    /* normalize by center frequency in barks:      */						if (z <= -0.5) {                          /* compute weighting */				d_1 = (float)(z + 0.5);				cb[icount] = (float) pow(c_b28, d_1);			}			else if (z >= 0.5){				d_1 = (float)((-2.5) * (z - 0.5));				cb[icount] = (float) pow(c_b28, d_1);			}			else{				cb[icount] = 1.0;			}						/*     calculate the 40 db equal-loudness curve        */			/*     at center frequency and add it to the weighting */
12 下一页
💿 文件大小 478 K
👤 上传用户 fengkuangyidao
📂 所属分类压缩解压
🏷️ 相关标签

#语音 #特征提取 #程序源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -