📄 hparm.c
字号:
"VARSCALEDIR", "VARSCALEMASK" , "VARSCALEPATHMASK" , "SIDEXFORMMASK", "SIDEXFORMEXT", "MATTRANFN", "MATTRAN", "THIRDWINDOW", "FOURTHWINDOW"};/* ------------------- Default Configuration Values ---------------------- */static const IOConfigRec defConf = { ANON, HTK, 0.0, FALSE, /* SOURCEKIND SOURCEFORMAT SOURCERATE ZMEANSOURCE */ ANON, HTK, 0.0, /* TARGETKIND TARGETFORMAT TARGETRATE */ FALSE, TRUE, /* SAVECOMPRESSED SAVEWITHCRC */ 256000.0, TRUE, 0.97, /* WINDOWSIZE USEHAMMING PREEMCOEF */ FALSE, 20, -1.0, -1.0, /* USEPOWER NUMCHANS LOFREQ HIFREQ */ 1.0, /* WARPFREQ */ 0.0, 0.0, /* WARPLCUTOFF WARPUCUTOFF */ 12, 0.33, /* LPCORDER COMPRESSFACT */ 22, 12, 1.0, /* CEPLIFTER NUMCEPS CEPSCALE */ TRUE, TRUE, 0.1, 50.0, /* RAWENERGY ENORMALISE ESCALE SILFLOOR */ 2, 2, FALSE, /* DELTAWINDOW ACCWINDOW SIMPLEDIFFS */ FALSE,0, /* USESILDET SELFCALSILDET */ 9.0,0.0,0.0, /* SPEECHTHRESH SILDISCARD SILENERGY */ 10,0,2, /* SPCSEQCOUNT SPCGLCHCOUNT SILGLCHCOUNT */ 100,40, /* SILSEQCOUNT SILMARGIN */ TRUE,TRUE, /* MEASURESIL OUTSILWARN */ NULLSIG, /* AUDIOSIG */ FALSE,NULL, /* V1COMPAT VQTABLE */ 0.0, /* ADDDITHER */ FALSE, /* DOUBLEFFT */ /* side based normalisation */ NULL, /* VARSCALEFN */ NULL,NULL,NULL, /* CMEANDIR CMEANMASK CMEANPATHMASK */ NULL,NULL,NULL, /* VARSCALEDIR VARSCALEMASK VARSCALEPATHMASK */ NULL,NULL, /* SIDEXFORMMASK SIDEXFORMEXT*/ NULL, /* vqTab */ NULL, NULL, 2, 2 /* MATTRANFN, MATTRAN THIRDWIN FOURTHWIN */};/* ------------------------- Buffer Definition ------------------------*//* Cepstral Mean Record for running average */typedef struct meanrec { int frames; /* Number of frames processed in session */ Vector defMeanVec; /* Default mean vector for reset */ Vector curMeanVec; /* Current mean */}MeanRec;/* HParm can deal with multiple channels (eg Audio*N/Files/RFE) *//* Each channel can have its own setup and preserved information */typedef struct channelinfo { char *confName; /* Configuration name associated with mean */ int fCnt; /* Number of files processed for this channel */ int sCnt; /* Number of files processed in current session */ int oCnt; /* Number of observations processed in session */ Boolean spDetParmsSet; /* Speech detector parameters set */ float frMin; /* Measured minimum frame energy for channel (dB) */ float spDetSil; /* Measured/set silence level for channel (dB) */ float spDetThresh; /* Measured/set speech/silence threshold (dB) */ float spDetSp; /* Measured/set speech level for channel (dB) */ float frMax; /* Measured maximum frame energy for channel (dB) */ float chPeak; /* Scaled peak-to-peak range 0.0-1.0 */ float chOffset; /* Average sample offset (-32768..32767) */ float spDetSNR; /* Measured/set silence/speech ratio (dB) */ IOConfigRec cf; /* Channel configuration */ struct channelinfo *next; /* Next channel record */}ChannelInfo;typedef struct hparmsrcdef { Ptr xInfo; /* Application data */ ParmKind pk; /* Type of source - split into parmKind and */ int size; /* Sample size fields */ HTime sampPeriod; /* Either 0.0 or the fixed sample rate of source */ Ptr (*fOpen)(Ptr xInfo,char *fn,BufferInfo *info); /* Open new buffer */ /* Return: Pointer to buffer specific data Connect to source and allocate necessary structures. Each buffer is associated with a specific pointer that is assigned to the return value of this function. All other buffer operations are passed this pointer. Typically it will be used to access a source specific data structure containing the necessary information for operating the source. */ void (*fClose)(Ptr xInfo,Ptr bInfo); /* Close buffer and free resources */ /* Ptr bInfo: Pointer returned by fOpen for this buffer Free all the resources associated with the buffer (including if necessary the info block itself). */ void (*fStart)(Ptr xInfo,Ptr bInfo); /* Start data capture for real-time sources */ /* Ptr bInfo: Pointer returned by fOpen for this buffer Start data capture. Offline sources can ignore this call. */ void (*fStop)(Ptr xInfo,Ptr bInfo); /* Stop data capture for real-time sources */ /* Ptr bInfo: Pointer returned by fOpen for this buffer Stop data capture. Offline sources can ignore this call. */ int (*fNumSamp)(Ptr xInfo,Ptr bInfo); /* Query samples readable without blocking */ /* Ptr bInfo: Pointer returned by fOpen for this buffer Return: Samples readable without blocking Used to determine size of next read. Offline sources can specify the whole utterance whereas real-time sources should return the number of buffered data samples once data capture has finished or -1 minus the number of samples that can be read without blocking. */ int (*fGetData)(Ptr xInfo,Ptr bInfo,int n,Ptr data); /* Read samples */ /* Ptr bInfo: Pointer returned by fOpen for this buffer int n: Number of samples required Ptr data: Buffer for returned samples Return: Samples read correctly Read samples from the source. In general will only read one frame at a time (either frSize samples for the first frame or frRate samples for the rest). Will only request a frame that fNumSamp indicates will block when the next thing to do is process the frame. Normally only non-blocking data will be requested (unless the decoder is keeping up with the source). */} HParmSrcDefRec;typedef enum channeltype { /* table=1, buffer=2, */ ch_haudio=4, /* The HAudio interface */ ch_hwave, /* A waveform file */ ch_hparm, /* A parmeterised file */ ch_hrfe, /* The RFE is not yet reimplemented */ ch_ext_wave, /* Externally defined waveform source */ ch_ext_parm /* Externally defined parameterised source */}ChannelType;#define MIN_PB_SIZE 64#define MAX_PB_SIZE 2048#define MAX_INT 536870911 /* Don't use INT_MAX cos get numeric overflow */typedef struct pblock { int stRow; /* absolute number of first row in this block */ int nRows; /* number of rows used in this block */ int maxRows; /* total number of rows in this block */ void *data; /* parameterised data for this block */ struct pblock *next; /* Next block */}PBlock;typedef struct _ParmBuf { MemHeap *mem; /* Memory heap for this parm buf */ PBStatus status; /* status of this buffer */ ChannelInfo *chan; /* input channel for this buffer */ IOConfig cf; /* configuration for this channel */ Boolean noTable; /* no need for table access */ ChannelType chType; /* type of input channel */ Boolean chClear; /* End of channel reached */ Boolean dShort; /* data is array of shorts not floats (DISCRETE) */ Boolean fShort; /* file is array of shorts (DISCRETE, COMPX or IREFC) */ /* New parameters for channel type buffer */ HParmSrcDef ext; /* external source functions */ union { AudioIn a; /* the audio source */ Wave w; /* the waveform file */ Ptr i; /* data for external source */ } in; unsigned short crcc;/* Put crcc here when we read it !! */ /* Channel buffer consists of a main active (for inwards reading, sil */ /* detection and qualification) block plus preceding blocks that form */ /* an infinitely extensible read only buffer */ PBlock main; /* Main block of data (next points to first block) */ int inRow; /* Absolute row number of next to read (nRows+stRow) */ int outRow; /* of next row to return (may be in any block) */ int lastRow; /* of final row (if we know it) */ /* Main buffer parameters */ int qst; /* next row in main block to qualify (qst>qwin) */ int qen; /* final row in main block qualified (last valid row) */ int qwin; /* Width of qualify window (needed on each side) */ /* Silence detector parameters and results */ int minRows; /* min rows to keep in main block */ float *spVal; /* Array of speech/silence levels */ int spDetLst; /* Last frame of speech seen */ int spDetCur; /* Current speech detector frame */ int spDetCnt; /* Number of speech frames in window */ int silDetCnt; /* Number of silence frames in window */ int spDetSt; /* first row to return (MAX_INT == waiting) */ int spDetEn; /* row after last to return (MAX_INT == waiting) */ int spDetFin; /* final row allowed to return (normally qen) */}ParmBufRec;/* ----------------------------- Local Memory --------------------------*/ static ConfParam *cParm[MAXGLOBS]; /* config parameters */static int nParm = 0;static MemHeap parmHeap; /* HParm no longer uses gstack */static Boolean hparmBin=TRUE; /* HTK format files are binary */static ChannelInfo *defChan=NULL;static ChannelInfo *curChan=NULL;/* ----------------------- IO Configuration Handling ------------------ *//* Load the global variance vector for side based CVN */static void LoadVarScale (MemHeap *x, IOConfig cf){ Source varsrc; char buf[MAXSTRLEN]; Boolean vbinary=FALSE; int dim,i; Matrix GlobalVar,NewGlobalVar; int NewFDim,FDim; if (strcmp (cf->varScaleFN, varScaleFN) == 0) { /* already cached */ cf->varScale = CreateVector (x, varScaleDim); for (i=1; i<=varScaleDim; i++) cf->varScale[i] = varScale[i]; } else { /* read it in */ if (InitSource (cf->varScaleFN, &varsrc, NoFilter) < SUCCESS) HError (6310, "LoadVarScale: Can't open varscale file %s", cf->varScaleFN); SkipComment (&varsrc); ReadString (&varsrc,buf); if (strcmp (buf, "<VARSCALE>") != 0) HError (6376, "LoadVarScale: <VARSCALE> missing, read: %s", buf); ReadInt (&varsrc, &dim, 1, vbinary); cf->varScale = CreateVector (x, dim); if (!ReadVector (&varsrc, cf->varScale, vbinary)) HError(6376 ,"LoadVarScale: Couldn't read var scale vector from file"); CloseSource (&varsrc); /* Apply a linear transform to the global variance */ if ((cf->MatTran != NULL) && (UseOldXFormCVN)) { FDim = NumCols(cf->MatTran); NewFDim = NumRows(cf->MatTran); NewGlobalVar = CreateMatrix(x,NewFDim,NewFDim); ZeroMatrix(NewGlobalVar); GlobalVar = CreateMatrix(x,FDim,FDim); ZeroMatrix(GlobalVar); for (i=1;i<=NumRows(GlobalVar);i++){ GlobalVar[i][i] = cf->varScale[i]; } LinTranQuaProd(NewGlobalVar,cf->MatTran,GlobalVar); cf->varScale = CreateVector(x,NewFDim); ZeroVector(cf->varScale); for (i=1;i<=NewFDim;i++){ cf->varScale[i] = NewGlobalVar[i][i]; } dim = NewFDim; } for (i=1; i<=dim; i++){ /* cache the vector */ varScale[i] = cf->varScale[i]; } varScaleDim = dim; strcpy (varScaleFN, cf->varScaleFN); }}/* After the global feature transform is loaded as a macro via HModel, if the channel feature transform config is empty then this function is invoked in LoadMat to pass on all the channel config setup from the loaded input linear transform data structure.*/static void SetInputXFormConfig(IOConfig cf, InputXForm *xf){ LinXForm *xform; xform = xf->xform; cf->xform = xf; if (IntVecSize(xform->blockSize) != 1) HError(999,"Only full linear transforms currently supported"); cf->matPK = xf->pkind; cf->preQual = xf->preQual; /* Currently hard-wired and ignored */ cf->preFrames = 0; cf->postFrames = 0; cf->MatTran = xform->xform[1]; if (cf->MatTranFN == NULL) /* set to non-NULL */ cf->MatTranFN = xf->xformName; if (((cf->preFrames>0) || (cf->postFrames>0)) && (HasZerom(cf->tgtPK))) HError(-1,"Mismatch possible for ZeroMean due to end truncations.\n All static parameters floored (including Energy) as using matrix transformation.\n For post transformation dynamic parameters also floored.");}/* EXPORT->SetParmHMMSet: specifies the HMMSet to be used with the frontend */void SetParmHMMSet(Ptr aset){ char buf[MAXSTRLEN]; InputXForm *cfg_xf, *hmm_xf; LabId id; hset = (HMMSet *)aset; hmm_xf = hset->xf; if (defChan != NULL) { /* xforms may already be set using config files */ cfg_xf = defChan->cf.xform; if (cfg_xf != NULL) { /* is there a transform currently set */ if (hmm_xf != NULL) { /* need to check that the transforms are the same. This may be achieved by ensuring that the transforms have the same macroname.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -