📄 fe_feature.h
字号:
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#ifndef _FE_FEATURE_H_
#define _FE_FEATURE_H_
#include "FE_common.h"
#include "FE_endpoint.h"
#include "FE_pitch.h"
#include "FE_enhance.h"
#include "FE_window.h"
#include "FE_polynomial.h"
#include "FE_plcc.h"
typedef CPolynomial<float> Polynomial;
typedef Complex<float> CComplex;
// Include definition for interface
#ifdef MAIN_PROGRAM
#include "FE_wave_feature.h"
#else
#include "../kWaves-0.95/Sources/WaveFeature.h"
#endif
class FeComplex { /* Complex number */
public:
float m_re;
float m_im;
};
class FeSpectrum { /* Spectrum for PLP */
public:
int m_ntime;
int m_nfreq;
float m_rate;
float **m_specData;
};
/* Feature name must be consistent with the declaration of FeatKind */
enum FeatKind { /* feature kind */
FE_LPC=0, FE_LPCC, FE_PLCC, FE_MFCC, FE_FTCC, FE_FBANK,
FE_LPC_D, FE_LPCC_D, FE_PLCC_D, FE_MFCC_D, FE_FTCC_D, FE_FBANK_D,
FE_FFT_SPEC, FE_LPC_SPEC, FE_LPCC_SPEC, FE_MFCC_SPEC, FE_FTCC_SPEC,
FE_LPCCOV, FE_LAR, FE_LSF, FE_PARCOR, FE_FORMANT,
FE_ZCR, FE_ENERGY, FE_PITCH, FE_VUS, FE_ENDPOINT,
FE_EPOCH, FE_GLOFLOW, FE_GLOPULSE, FE_LPCRES, FE_FFTCEP,
FE_FILE, FE_NUM_FEAT
};
/* Feature name must be consistent with the declaration of FeatKind */
extern const char *FE_featNameA[];
extern const char *FE_featExtA[];
enum FeLifter { /* lifter type */
LIFT_NO, LIFT_SIN, LIFT_LINEAR, LIFT_SQRT, LIFT_CUBE_ROOT
};
enum FeByteOrder { /* Byte order */
MY_BIG_ENDIAN, MY_LITTLE_ENDIAN
};
enum EVusType { /* Silence/Voiced/Unvoiced frame classification */
FRM_SILENCE, FRM_UNVOICED, FRM_VOICED
};
typedef struct { /* mel filter banks */
int m_lowX;
int m_centerX;
int m_highX;
} MfccMelFB;
class Fe { /* Feature extraction parameters */
public:
int *m_pProgress;
int *m_pCancel;
string m_tag;
// common parameters
int m_dither;
int m_lpcOrder;
int m_cepOrder;
int m_fbOrder;
int m_cepSmooth;
int m_fftSize;
int m_sampleRate;
int m_shiftSizeMs;
int m_winSizeMs;
int m_deltaSize;
float m_emphFac;
FeWindow m_window;
FeLifter m_lifter;
FeByteOrder m_byteOrder;
int m_swapByte; /* default byte order is little endian */
int m_covShiftSizeMs, m_covWinSizeMs;
// temporary storage
vector<EVusType> m_vusA;
vector<float> m_pitchA;
vector<FeComplex> m_fftW; /* used to store the m_fftW complex array */
// for MFCC
int m_MelFBfftSize;
vector<MfccMelFB> m_MelFB;
vector<float> m_MelWeight;
vector<float> m_MelCenterFreq;
vector<int> m_MelCenterIdx;
vector<float> m_dctMatrix;
vector<float> m_idctMatrix;
float m_logEnergyFloor;
float m_energyFloor;
// for pitch
float m_meanPitch;
int m_pitchFrameN;
// for PLP
int m_plpOrder; /* PLP model order */
int m_plccOrder; /* number of parameters */
int m_plpGain; /* gain flag ON/OFF (1/0) */
float m_plpExpon; /* peak enhancemnt factor */
vector<FeComplex> m_plpW; /* used by FFT() to hold W twiddle */
int m_plpMofW;
vector<FeComplex> m_plpCF; /* Trigonometrix Recombination Coeff */
int m_plpMofCF;
int m_plpIcall; /* Initialized data */
int m_plpNfilt;
float m_plpHwei[512]; /* hamming window -- weigh speech */
float m_plpWcos[368]; /* was [23][16] */
float m_plpCb[900];
int m_plpIbegen[69]; /* was [23][3] */
// basic.cpp
int fftcep_spectrum_basic(short *sample, int frameSize, float *spectrum, int fftSize, int cepFilterLen);
int lpc_spectrum_basic(short *sample, int frameSize, int norder, float *spectrum, int fftSize);
int lpccep_spectrum_basic(short *sample, int frameSize, int ceporder, int norder, float *spectrum, int fftSize);
int melcep_spectrum_basic(short *sample, int frameSize, int ceporder, float *spectrum, int fftSize);
int lpc_basic(short *sample, int frameSize, float *acf, int norder, float *G);
int lpc_cov_basic(short *sample, int frameSize, float *acf, int norder, float *G);
int lpc_error_basic(short *sample, int frameSize, float *acf, int norder, float* residual);
int parcor_basic(short *sample, int frameSize, float *kcf, int norder);
int lar_basic(short *sample, int frameSize, float *lar, int norder);
int formant_basic(short *sample, int frameSize, float *formant, int formantN, int norder);
float calc_lpc_gain_basic(float *r, float *acf, int norder);
int _lpc_parcor_basic(float *sample, int frameSize, float *acf, float *kcf, int norder, float *G);
int _lpc_basic(float *sample, int frameSize, float *acf, int norder, float *G);
int _lpc_error_basic(float *sample, int frameSize, float *acf, int norder, float* residual);
int preprocessing(short *sample, int sampleN, float *out);
float compute_energy(float *sample, int sampleN, float mean);
int compute_zero_cross_rate(float *sample, int sampleN, int level, int dc_bias);
int cepstral_window(float *cep,int ceporder, FeLifter lifter);
int preemphasize(float *sample, int sampleN, float emphFac);
// delta.cpp
int delta_compute(FeMatrix<float>& input, int filterLen, FeMatrix<float>& output);
int delta_basic(FeMatrix<float>& input, FeMatrix<float>& output, float *filterCoeff, int filterLen);
int delta_init_filter(float *filterCoeff, int filterLen);
// endpoint.cpp
bool EpdMain(const char *inputfile, int sampleRate, const char *outputfile);
int epd_basic(short *sampleA, int sampleN, int sampleRate, vector<CSegment>& endPointA);
bool epd_insert_endpoint(vector<CSegment>& endPointA, float startPt, float endPt);
// enhance.cpp
int EnhanceMain(const char *infile, const char *outfile, int sampleRate, int isWiener);
void enhance_basic(short *sample, int sampleN, int samplingRate, int isWiener);
// epoch.cpp
int calc_epoch(float *sample, int sampleN, vector<short>& epoch, short* PitchPeriod = NULL);
float calc_DWT(float *sample, float *a, int len);
// error.cpp
int FE_INFO(char *fmt, ... );
int FE_WARN(char *fmt, ... );
int FE_ERROR(char *fmt, ... );
int FE_FATAL(char *fmt, ... );
int err_quit(char *fmt, ... );
int err_ret(char *fmt, ... );
int err_dump(char *fmt, ... );
int err_fopen (const char *s);
// feature.cpp
Fe();
virtual ~Fe();
void Init(FeatKind fk, CFeature& adcData);
int FeatureMain(FeatKind fk, const char *infile, const char *outfile, const char *parafile = NULL, const char* tag = NULL);
int FeatureMain(FeatKind fk, CFeature& adcData, int beginX, int endX, CFeature& feature, int outBeginX, int outEndX);
void FeatureExtract(FeatKind fk, const char *inputFile, const char *outputFile);
int FeatureExtract(FeatKind fk, CFeature& adcData, int beginX, int endX, CFeature& feature, int outBeginX, int outEndX);
int compute_feature_1d(FeatKind fk, float *sample, int sampleN, vector<short>& featA);
int compute_feature_2d(FeatKind fk, float *sample, int sampleN, FeMatrix<float>& featA);
int ReadParaFile(const char *parafile);
int ReadTag(const char *tag, int* pTag, int ndim);
int GetShiftSize();
int GetFrameSize();
int GetDim(FeatKind fk);
float LogE(float x);
int Integrate(float *a, int n, short *b);
int Integrate(short *a, int n, short *b);
float GetMedian(float* a, int n);
int InitProgress(int* pProgress, int* pCancel);
int CheckWinMessage();
int ShowProgress(int progress);
// formant.cpp
int lpc_to_formant(float* acf, int norder, float* formant, int formantN, vector<CComplex>& rootsA);
int formant_check_range(FeMatrix<float>& formant, int formantN, int frameN);
int formant_median_filter(FeMatrix<float>& formant, int formantN, int frameN);
int formant_linear_filter(FeMatrix<float>& formant, int formantN, int frameN);
int formant_remove_nonvoice(FeMatrix<float>& formant, int formantN, int frameN, vector<float>& pitchA);
// io.cpp
int ad_read(FILE *fp, short *buf, int n);
int ad_write(FILE *fp, short *buf, int n);
int read_feature_basic(FeMatrix<float>& feat, int dim, FILE *fi);
int write_feature_basic(FeMatrix<float>& feat, int num_frames, int dim, FILE *fo);
int write_feature_vectors(FILE *fp, FeMatrix<float>& feat, int* pTag = NULL, const char* featname = NULL);
int FREAD(void *ptr, int size, int nitems, FILE *stream);
int FWRITE(void *ptr, int size, int nitems, FILE *stream);
// lpc_cep.cpp
int lpc_cepstrum_basic(short *sample, int frameSize, float *lpc_cep, int ceporder, int norder);
int auto_correlation(float *wsamp, float *ac, int nsamp, int order);
int levins (float *r, float *kcf, float *acf, int norder);
int durbin(float *r, float *kcf, float *acf, int order);
int stable_k(float *kcf, int norder);
int normalize_corr(float *r, int norder);
int covariance(float *sample, int frameSize, FeMatrix<float>& cov, int order);
int choldc(FeMatrix<float>& a, int n, float *p);
int cholsl(FeMatrix<float>& a, int n, float *p, float *b, float *x);
int CholeskySol(FeMatrix<float>& a, float *b, float *x, int n);
int lpc_cov(float *sample, int frameSize, FeMatrix<float>& cov, float *acf, int order, float *G);
int lpc_cov_error(float *sample, int frameSize, float *acf, int norder, float* residual);
int lpc_to_cepstrum(float *acf, int norder, float *cep, int ncf, float G);
int bilinear_transform(float *org_seq, int num_org_seq, float *trans_seq, int no_update, float warp_coeff);
// lsf.cpp
int lsf_basic(short *sample, int frameSize, float *lsp, int norder);
int lpc_to_lsf(float* acf, int norder, float* lsf, vector<CComplex>& oldRootsP, vector<CComplex>& oldRootsQ);
// mel_cep.cpp
int mel_cepstrum_basic(short *sample, int frameSize, float *mel_cep, int ceporder, int fftSize);
int _mel_cepstrum_basic(float *sample, int frameSize, float *mel_cep, int fborder, int ceporder, int fftSize);
void MfccInitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels);
void MfccMelFilterBank (float *sigFFT, int numChannels, float* output, int normalize);
int MfccInitDCTMatrix (float *dctMatrix, int ceporder, int numChannels);
int MfccInitIDCTMatrix (float *idctMatrix, int ceporder, int numChannels);
void MfccDCT (float *x, float *dctMatrix, int ceporder, int numChannels, float *mel_cep);
void MfccIDCT (float *mel_cep, float *idctMatrix, int ceporder, int numChannels, float *x);
// misc_lib.cpp
long lrand48();
void srand48(long seedvar);
void *m_alloc(int size);
void *c_alloc(int num_of_elts, int size);
void *re_alloc(void *block, int size);
char **alloc2d(int dim1, int dim2, int size);
char ***alloc3d(int dim1, int dim2, int dim3, int size);
void free2d(void **p);
void free3d(void ***p);
int little_endian();
float sum(int nstart, int nfinal, float *seq);
float product (int nstart, int nfinal, float *seq );
float power(float x, int n);
int ipower(int x, int n);
// noise.cpp
double Random();
double GaussianNoise(double x, double s);
int AddNoise(float *sample, int num_of_samples, float *waveform, int insert_noise, float SNR);
int Dither(float *buf, int n);
// pitch.cpp
int pitch_basic(short *sample, int sampleN, int sampleRate, int shiftSize, vector<EVusType>& vusA, vector<float>& pitchA);
bool pitch_amdf(float *speech, float *amdf, int blockSize, int pmin, int pmax);
float pitch_find(float *speech, int blockSize, float *pitchA, int t, float *amdf, int shiftSize, int sampleRate);
bool pitch_low_pass_filter(short *input, int sampleN, int lpfLen);
bool pitch_find_minmax(float *amdf, int pmin, int pmax, int* smin, int* smax);
int pitch_remove_spike(vector<float>& pitchA);
int pitch_linear_filter(vector<float>& pitchA);
float pitch_fft_one_freq(float *sample, int blockSize, int p);
// plp_cep.cpp
int plp_cepstrum_basic(short *sample, int frameSize, float *plp_cep, int ceporder, int norder);
int _plp_cepstrum_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder);
int init_plp();
int _plp_basic(float *sample, int frameSize, float *plp_cep, int ceporder, int norder);
int plp_analysis(float *speech, int frameSize, int m, float *a, float *rc, float *gain, int sf );
int audw_ ( int npoint, int *nfilt, float *cb, int *ibegen, int sf );
int cosf_ ( int m, int nfilt, float *wcos );
int a2gexp_ ( float *a, float *gexp, int i, int nc, float expon );
int TrigRecombFFT ( FeComplex *cx, FeComplex *y, int m );
int PlpFFT( FeComplex *x, int m );
int CalculateW ( int m );
int CalculateCF( int m );
int RastaFilter(FeSpectrum *mfsc, int r_filter, float r_f_param);
// spectrum.cpp
int fft_spectrum_basic(short *sample, int frameSize, float *spectrum, int fftSize);
int _fft_spectrum_basic(short *sample, int frameSize, float *spectrum, int fftSize, int cep_smooth, int cepFilterLen);
int fft_cepstrum_basic(short *sample, int frameSize, float *fft_cep, int ceporder, int fftSize);
int _fft_cepstrum_basic(float *sample, int frameSize, float *fft_cep, int ceporder, int fftSize);
int filterbank_basic(short *sample, int frameSize, float *filter_bank, int fborder, int fftSize);
int _filterbank_basic(float *sample, int frameSize, float *filter_bank, int fborder, int fftSize, int cep_smooth, int cepFilterLen);
int compute_spectrum(float *input, float *spectrum, int winlength, int log2length);
int smooth_spectrum(float *spectrum, int pointsN);
void FAST_new(FeComplex *x, int m);
// vus.cpp
int vus_basic(short *sample, int sampleN, int frameSize, vector<EVusType>& vusA);
int vus_median_filter(vector<EVusType>& vusA);
int vus_remove_short_segments(vector<EVusType>& vusA);
int vus_remove_short_segments_sub(vector<EVusType>& vusA, EVusType type, int minDur);
};
int GetDefaultDim(enum FeatKind fk);
int GetDefaultOrder(enum FeatKind fk);
const char *GetFeatName(FeatKind fk);
string GetFeatExtension(FeatKind fk);
FeatKind GetFeatName2Kind(const char* name);
FeatKind GetBaseFeatKind(FeatKind fk);
int HasDeltaFeat(FeatKind fk);
int HasStftFeat(FeatKind fk);
int FE_CompareFloat(const void *a, const void *b);
void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff);
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -