📄 hparm.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2001-2002 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HParm.c: Speech Parameter File Input/Output *//* ----------------------------------------------------------- */char *hparm_version = "!HVER!HParm: 3.3 [CUED 28/04/05]";char *hparm_vc_id = "$Id: HParm.c,v 1.2 2005/07/22 10:17:02 mjfg Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "esignal.h"#ifdef UNIX#include <sys/ioctl.h>#endif/* ----------------------------- Trace Flags ------------------------- */static int trace = 0;#define T_TOP 0001 /* Top Level tracing */#define T_BUF 0002 /* Buffer operations */#define T_CPX 0004 /* Compression/Decompression */#define T_PBS 0010 /* Buffer status */#define T_QUA 0020 /* Qualifier operations */#define T_OBS 0040 /* Observation extraction */#define T_DET 0100 /* Silence detector operation */#define T_MAT 0200 /* Matrix operations *//* --------------------- Global Variables ------------------- */static Boolean natWriteOrder = FALSE; /* Preserve natural write byte order*/extern Boolean vaxOrder; /* true if byteswapping needed to preserve SUNSO *//* varScale stuff: acts as a cache to stop the scaling file being re-read on each file opening */static float varScale[100];static int varScaleDim=0;static char varScaleFN[MAXFNAMELEN] = "\0";static Boolean highDiff = FALSE; /* compute higher oder differentials, only up to fourth */static Boolean UseOldXFormCVN = TRUE; /* this allows us to go back to the old version with broken CVN */static ParmKind ForcePKind = ANON; /* force to output a customized parm kind to make older versions happy for all the parm kind types supported here */static HMMSet *hset = NULL; /* hmmset to be used for frontend *//* ------------------------------------------------------------------- *//* Parameter layout in tables/buffers is Static [C0] [E] Deltas Accs _N option is ignored everywhere except when copying from buffer or table into an observation (ie in ExtractObservation) and in GetBufferInfo() which returns the observation vector size in tgtvecSize taking into account _N. When _0 is used alone it behaves exactly like _E. When _0_E, C0 is placed immediately before energy and in this case deltas are not allowed.*//* ----------------- Configuration Information ----------------- *//* An IOConfig record specifies the mapping from the source to the target parameterisation. Its built in defaults can be overridden using configuration parameters.*/typedef enum { FFTbased, LPCbased, VQbased} CodeStyle;typedef struct { /* ------- Overrideable parameters ------- */ ParmKind srcPK; /* Source ParmKind */ FileFormat srcFF; /* Source File format */ HTime srcSampRate; /* Source Sample Rate */ Boolean zMeanSrc; /* Zero Mean the Source */ ParmKind tgtPK; /* Target ParmKind */ FileFormat tgtFF; /* Target File format */ HTime tgtSampRate; /* Target Sample Rate */ Boolean saveCompressed; /* If LPREFC save as IREFC else _C */ Boolean saveWithCRC; /* Append check sum on save */ HTime winDur; /* Source window duration */ Boolean useHam; /* Use Hamming Window */ float preEmph; /* PreEmphasis Coef */ Boolean usePower; /* Use power instead of Magnitude */ int numChans; /* Number of filter bank channels */ float loFBankFreq; /* Fbank lo frequency cut-off */ float hiFBankFreq; /* Fbank hi frequency cut-off */ float warpFreq; /* Warp freq axis for vocal tract normalisation */ float warpLowerCutOff; /* lower and upper threshold frequencies */ float warpUpperCutOff; /* for linear frequency warping */ int lpcOrder; /* Order of lpc analysis */ float compressFact; /* Compression factor for PLP */ int cepLifter; /* Cepstral liftering coef */ int numCepCoef; /* Number of cepstral coef */ float cepScale; /* Scaling factor to avoid arithmetic problems */ Boolean rawEnergy; /* Use raw energy before preEmp and ham */ Boolean eNormalise; /* Normalise log energy */ float eScale; /* Energy scale factor */ float silFloor; /* Silence floor in dBs */ int delWin; /* Delta window halfsize */ int accWin; /* Accel window halfsize */ Boolean simpleDiffs; /* Use simple differences for delta calcs */ /* Silence detector parameters */ Boolean useSilDet; /* Use Silence Detector */ int selfCalSilDet; /* Self calibrating silence detection */ float spThresh; /* Speech Threshold (in dB above sil level) */ float silDiscard; /* Calibrate discard level */ float silMean; /* Mean silence energy (in dB) */ int spcSeqCount; /* Number of frames for speech window */ int spcGlchCount; /* of spc in sil acceptable as glitches */ int silGlchCount; /* of sil in spc acceptable as glitches */ int silSeqCount; /* of silence before stopping */ int marginCount; /* of sil around speech to process */ Boolean measureSil; /* Measure Silence */ Boolean outSilWarn; /* Give Warning when SilMeas */ /* Misc */ int audSignal; /* Signal Number for Audio Control */ Boolean v1Compat; /* V1 compatibility mode */ char *vqTabFN; /* Name of VQ Table Defn File */ float addDither; /* Additional dither added to file */ Boolean doubleFFT; /* use twice the required FFT size */ /* side based normalisation */ char *varScaleFN; /* var scale file name */ char* cMeanDN; /* dir to find cepstral mean files */ char* cMeanMask; /* cepstral mean selection mask */ char* cMeanPathMask; /* cepstral mean path selection mask */ char* varScaleDN ; /* dir to find variance estimate files */ char* varScaleMask; /* variance estimate file selection mask */ char* varScalePathMask; /* variance estimate file path selection mask */ char* sideXFormMask; /* side XForm mask */ char* sideXFormExt; /* side XForm mask */ VQTable vqTab; /* VQ table */ Matrix MatTran; /* Stores transformation matrix */ char *MatTranFN; /* points to the file name string */ int thirdWin; /* Accel window halfsize */ int fourthWin; /* Fourth order differential halfsize */ /* ------- Internally derived parameters ------- */ /* These values are allocated in the IOConfigRec but are really */ /* specific to each pbuf and do not rely on any kind of initialisation */ /* Following 3 variables always reflect the actual state of */ /* the associated data which may be intermediate between src and tgt */ ParmKind curPK; /* Used to track conversion from srcPK to tgtPK */ ParmKind unqPK; /* Used to track conversion from srcPK to tgtPK */ int nUsed; /* num columns used in each row of the parm block */ /* The next two are static buffer sizes */ int nCols; /* num columns in each row of the parameter block */ int nCvrt; /* num columns produced from coding */ /* sizes of source and target */ long nSamples; /* num samples in original (WAVEFORM only) */ int srcUsed; /* num columns which was used in source */ int tgtUsed; /* num columns which will be used once converted */ /* Working storage needed for conversions, etc */ CodeStyle style; /* style encoding */ int frSize; /* Total number of waveform samples in frame */ int frRate; /* Number of waveform samples advanced each frame */ Vector s; /* speech vector */ ShortVec r; /* raw speech vector */ char *rawBuffer; /* buffer for external data */ float curVol; /* current volume dB (0.0-100.0) */ Vector a,k; /* lpc and refc vectors */ Vector fbank; /* filterbank vector */ Vector c; /* cepstral vector */ Vector as, ac, lp; /* Auditory, autocorrelation an lp vectors for PLP */ Vector eql; /* Equal loundness curve */ DMatrix cm; /* Cosine matrix for IDFT */ FBankInfo fbInfo; /* FBank info used for filterbank analysis */ Vector mean; /* Running mean shared by this config */ /* Running stuff */ Source src; /* Source to read HParm file from */ Boolean bSwap; /* TRUE if source needs byte swapping */ unsigned short crcc; /* Running CRCC */ Vector A; /* Parameters for decompressing */ Vector B; /* HTK parameterised files */ Vector varScale; /* var scaling vector */ Vector cMeanVector; /* vector loaded from cmean dir */ Vector varScaleVector; /* vector loaded from varscale dir */ ParmKind matPK; int preFrames; int postFrames; Boolean preQual; InputXForm *xform; AdaptXForm *sideXForm;}IOConfigRec;typedef IOConfigRec *IOConfig;typedef enum { /* Source characteristics */ SOURCEKIND, /* ParmKind */ SOURCEFORMAT, /* FileFormat */ SOURCERATE, /* Source sample rate in 100ns */ ZMEANSOURCE, /* Zero Mean (Wave only) */ /* Target characteristics */ TARGETKIND, /* ParmKind */ TARGETFORMAT, /* FileFormat */ TARGETRATE, /* Target sample rate in 100ns */ SAVECOMPRESSED,/* Save output files in compressed form */ SAVEWITHCRC, /* Add crc check to output files */ /* Waveform Analysis */ WINDOWSIZE, /* Window size in 100ns */ USEHAMMING, /* Apply Hamming Window */ PREEMCOEF, /* Preemphasis Coefficient */ /* Filterbank Analysis */ USEPOWER, /* Use power instead of magnitude */ NUMCHANS, /* Num filterbank channels */ LOFREQ, /* Lo Fbank frequency */ HIFREQ, /* Hi Fbank frequency */ WARPFREQ, /* Vocal tract length compensation by frequency warping */ WARPLCUTOFF, /* VTL warping cutoff frequencies for smoothing */ WARPUCUTOFF, /* LPC Analysis and Conversion */ LPCORDER, /* LPC order */ COMPRESSFACT, /* Compression Factor fo PLP */ /* Cepstral Conversion */ CEPLIFTER, /* Cepstral liftering coefficient */ NUMCEPS, /* Num cepstral coefficients */ CEPSCALE, /* Scale factor to prevent arithmetic errors */ /* Energy Computation */ RAWENERGY, /* Use raw energy */ ENORMALISE, /* Normalise log energy */ ESCALE, /* Log energy scale factor */ SILFLOOR, /* Silence floor in dBs */ /* Regression Coefficients */ DELTAWINDOW, /* Window size for 1st diffs */ ACCWINDOW, /* Window size for 2nd diffs */ SIMPLEDIFFS, /* Use simple differences */ /* Silence Detector */ USESILDET, /* Enable speech/silence detection */ SELFCALSILDET, /* Self calibrating silence detection on each utterance */ SPEECHTHRESH, /* Speech detector threshold */ SILDISCARD, /* Energy below which frames discarded when calibrating */ SILENERGY, /* Silence detector threshold */ SPCSEQCOUNT, /* Speech sequence count */ SPCGLCHCOUNT, /* Speech glitch count */ SILGLCHCOUNT, /* Silence glitch count */ SILSEQCOUNT, /* Silence sequence count */ SILMARGIN, /* Margin of silence around speech */ MEASURESIL, /* Measure Background Silence */ OUTSILWARN, /* Output Warning before Measure Sil */ /* Audio Input */ AUDIOSIG, /* Signal for audio control */ V1COMPAT, /* Set Version 1 compatibility mode */ /* Vector Quantisation */ VQTABLE, /* Name of file holding VQ table */ ADDDITHER, /* Amount of additional dither added to file */ DOUBLEFFT, /* Use twice the required FFT size */ /* side based normalisation */ /* variance scaling */ VARSCALEFN, /* cepstral mean subtraction */ CMEANDIR, /* dir to find the means */ CMEANMASK, /* label mask to idenitfy mean file */ CMEANPATHMASK,/* label mask to idenitfy the path of mean file */ VARSCALEDIR, /* dir to find the variance estimate files */ VARSCALEMASK, /* label mask to idenitfy the variance estimate files */ VARSCALEPATHMASK, /* label mask to idenitfy the path of the variance estimate files */ SIDEXFORMMASK,/* mask for use with side-based xforms */ SIDEXFORMEXT, /* extension for use with side-based xforms */ /* MatTran file */ MATTRANFN, /* File name for MatTran file */ MATTRAN, /* Extended Deltas */ THIRDWINDOW, FOURTHWINDOW, CFGSIZE}IOConfParm;static char * ioConfName[CFGSIZE] = { "SOURCEKIND", "SOURCEFORMAT", "SOURCERATE", "ZMEANSOURCE", "TARGETKIND", "TARGETFORMAT", "TARGETRATE", "SAVECOMPRESSED", "SAVEWITHCRC", "WINDOWSIZE", "USEHAMMING", "PREEMCOEF", "USEPOWER", "NUMCHANS", "LOFREQ", "HIFREQ", "WARPFREQ", "WARPLCUTOFF", "WARPUCUTOFF", "LPCORDER", "COMPRESSFACT", "CEPLIFTER", "NUMCEPS", "CEPSCALE", "RAWENERGY","ENORMALISE", "ESCALE", "SILFLOOR", "DELTAWINDOW", "ACCWINDOW", "SIMPLEDIFFS", "USESILDET", "SELFCALSILDET", "SPEECHTHRESH", "SILDISCARD", "SILENERGY", "SPCSEQCOUNT", "SPCGLCHCOUNT", "SILGLCHCOUNT", "SILSEQCOUNT", "SILMARGIN", "MEASURESIL", "OUTSILWARN" ,"AUDIOSIG", "V1COMPAT", "VQTABLE" ,"ADDDITHER", "DOUBLEFFT", "VARSCALEFN", "CMEANDIR" , "CMEANMASK", "CMEANPATHMASK",
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -