📄 hrest.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2002 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HRest.c: HMM initialisation program *//* ----------------------------------------------------------- */char *hrest_version = "!HVER!HRest: 3.3 [CUED 28/04/05]";char *hrest_vc_id = "$Id: HRest.c,v 1.1.1.1 2005/05/12 10:52:54 jal58 Exp $";/* This program is used to estimate the transition parameters, means, covariances and mixture weights of a hidden Markov model using Baum-Welch reestimation. It handles multiple streams and tying but it ignores stream weights by assuming that they are all unity.*//* Trace Flags */#define T_TOP 0001 /* Top level tracing */#define T_LD0 0002 /* File Loading */#define T_LD1 0004 /* + segments within each file */#define T_OTP 0010 /* Observation Probabilities */#define T_ALF 0020 /* Alpha matrices */#define T_BET 0040 /* Beta matrices */#define T_OCC 0100 /* Occupation Counters */#define T_TAC 0200 /* Transition Counters */#define T_MAC 0400 /* Mean Counters */#define T_VAC 01000 /* Variance Counters */#define T_WAC 02000 /* MixWeight Counters */#define T_TRE 04000 /* Reestimated transition matrix */#define T_WRE 010000 /* Reestimated mixture weights */#define T_MRE 020000 /* Reestimated means */#define T_VRE 040000 /* Reestimated variances */#define T_LGP 0100000 /* Compare LogP via alpha and beta */#include "HShell.h" /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HTrain.h"#include "HUtil.h"/* Global Settings */static char * segLab = NULL; /* segment label if any */static LabId segId = NULL; /* and its index */static char * labDir = NULL; /* label file directory */static char * labExt = "lab"; /* label file extension */static char * outDir = NULL; /* output macro file directory, if any */static int maxIter = 20; /* max iterations in parameter estimation */static float epsilon = 1.0E-4; /* convergence criterion */static int minSeg = 3; /* min segments to train a model */static Boolean firstTime = TRUE; /* Flag used to enable InitSegStore */static Boolean saveBinary = FALSE; /* save output in binary */static FileFormat dff=UNDEFF; /* data file format */static FileFormat lff=UNDEFF; /* label file format */static float minVar = 0.0; /* minimum variance */static float mixWeightFloor=0.0; /* Floor for mixture weights */static float tMPruneThresh = 10.0; /* tied mix prune threshold */static char *hmmfn; /* HMM definition file name */static char *outfn=NULL; /* output definition file name */static UPDSet uFlags = (UPDSet) (UPMEANS|UPVARS|UPTRANS|UPMIXES); /* update flags */static int trace = 0; /* Trace level */static ConfParam *cParm[MAXGLOBS]; /* configuration parameters */static int nParm = 0; /* total num params */static Boolean segReject = TRUE; /* Enable short train segment rejection *//* Global Data Structures */static HMMSet hset; /* The current unitary hmm set */static HLink hmm; /* link to the hmm itself */static int nStates; /* numStates of hmm */static int nStreams; /* numStreams of hmm */static HSetKind hsKind; /* kind of the HMM system */static int maxMixes; /* max num mixtures across all streams */static int maxMixInS[SMAX];/* array[1..swidth[0]] of max mixes */static int nSeg; /* num training segments */static int nTokUsed; /* actual number of tokens used */static int maxT,minT,T; /* max,min and current segment lengths */static DMatrix alpha; /* array[1..nStates][1..maxT] of forward prob */static DMatrix beta; /* array[1..nStates][1..maxT] of backward prob */static Matrix outprob; /* array[2..nStates-1][1..maxT] of output prob */static Vector **stroutp; /* array[1..maxT][2..nStates-1][1..nStreams] ...*/ /* ... of streamprob */static Matrix **mixoutp; /* array[2..nStates-1][1..maxT][1..nStreams] [1..maxMixes] of mixprob */static Vector occr; /* array[1..nStates-1] of occ count for cur time */static Vector zot; /* temp storage for zero mean obs vector */static Vector vFloor[SMAX]; /* variance floor - default is all zero */static float vDefunct=0.0; /* variance below which mixture defunct */static SegStore segStore; /* Storage for data segments */static MemHeap segmentStack; /* Used by segStore */static MemHeap alphaBetaStack; /* For storage of alpha and beta probs */static MemHeap accsStack; /* For storage of accumulators */static MemHeap transStack; /* For storage of transcription */static MemHeap bufferStack; /* For storage of buffer */static ParmBuf pbuf; /* Currently input parm buffer */ /* ------------------ Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to HRest */void SetConfParms(void){ int i; double d; Boolean b; nParm = GetConfig("HREST", TRUE, cParm, MAXGLOBS); if (nParm>0) { if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; if (GetConfBool(cParm,nParm,"SAVEBINARY",&b)) saveBinary = b; if (GetConfFlt(cParm,nParm,"VDEFUNCT",&d)) vDefunct = d; }}void ReportUsage(void){ printf("\nUSAGE: HRest [options] hmmFile trainFiles...\n\n"); printf(" Option Default\n\n"); printf(" -e f Set convergence factor epsilon 1.0E-4\n"); printf(" -i N Set max iterations to N 20\n"); printf(" -l s Set segment label to s none\n"); printf(" -m N Set min segments needed 3\n"); printf(" -t Disable short segment rejection on\n"); printf(" -u tmvw Update t)rans m)eans v)ars w)ghts tmvw\n"); printf(" -v f Set minimum variance to f 0.0\n"); printf(" -c f Tied Mixture pruning threshold 10.0\n"); printf(" -w f Set mix wt floor to f x MINMIX 0.0\n"); PrintStdOpts("BFGHILMSTX"); printf("\n\n");}void SetuFlags(void){ char *s; s=GetStrArg(); uFlags=(UPDSet) 0; while (*s != '\0') switch (*s++) { case 't': uFlags = (UPDSet) (uFlags+UPTRANS); break; case 'm': uFlags = (UPDSet) (uFlags+UPMEANS); break; case 'v': uFlags = (UPDSet) (uFlags+UPVARS); break; case 'w': uFlags = (UPDSet) (uFlags+UPMIXES); break; default: HError(2220,"SetuFlags: Unknown update flag %c",*s); break; }}int main(int argc, char *argv[]){ char *datafn, *s; void Initialise1(void); void Initialise2(void); void LoadFile(char *fn); void ReEstimateModel(void); void SaveModel(char *outfn); if(InitShell(argc,argv,hrest_version,hrest_vc_id)<SUCCESS) HError(2200,"HRest: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2200,"HRest: InitParm failed"); InitTrain(); InitUtil(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHMMSet(&hset,&gstack,FALSE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2219,"HRest: Bad switch %s; must be single letter",s); switch(s[0]){ case 'e': epsilon = GetChkedFlt(0.0,1.0,s); break; case 'i': maxIter = GetChkedInt(1,100,s); break; case 'l': if (NextArg() != STRINGARG) HError(2219,"HRest: Segment label expected"); segLab = GetStrArg(); break; case 'm': minSeg = GetChkedInt(1,1000,s); break; case 't': segReject = FALSE; break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,100.0,s); break; case 'c': tMPruneThresh = GetChkedFlt(0.0,1000.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'B': saveBinary = TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2219,"HRest: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2289,"HRest: Warning ALIEN Data file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(2219,"HRest: Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2289,"HRest: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(2219,"HRest: HMM macro file name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(2219,"HRest: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(2219,"HRest: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg()!=STRINGARG) HError(2219,"HRest: Output macro file directory expected"); outDir = GetStrArg(); break; case 'T': trace = GetChkedInt(0,0100000,s); break; case 'X': if (NextArg()!=STRINGARG) HError(2219,"HRest: Label file extension expected"); labExt = GetStrArg(); break; default: HError(2219,"HRest: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(2219,"HRest: source HMM file name expected"); hmmfn = GetStrArg(); if (outfn==NULL) outfn = hmmfn; Initialise1(); do { if (NextArg()!=STRINGARG) HError(2219,"HRest: training data file name expected"); datafn = GetStrArg(); LoadFile(datafn); } while (NumArgs()>0); nSeg = NumSegs(segStore); if (nSeg < minSeg) HError(2221,"HRest: Too Few Training Examples [%d]",nSeg); Initialise2(); if (trace&T_TOP) { printf("%d Examples loaded, Max length = %d, Min length = %d\n", nSeg, maxT,minT); fflush(stdout); } ReEstimateModel(); if(SaveHMMSet(&hset,outDir,NULL,NULL,saveBinary)<SUCCESS) HError(2211,"HRest: SaveHMMSet failed"); Exit(0); return (0); /* never reached -- make compiler happy */}/* ------------------------ Initialisation ----------------------- *//* PrintInitialInfo: print a header of program settings */void PrintInitialInfo(void){ printf("Reestimating HMM %s . . . \n",hmmfn); PrintHMMProfile(stdout, hmm); if (segLab==NULL) printf(" SegLab : NONE\n"); else printf(" SegLab : %s\n",segLab); printf(" MaxIter : %d\n",maxIter); printf(" Epsilon : %f\n",epsilon); printf(" Updating : "); if (uFlags&UPTRANS) printf("Transitions "); if (uFlags&UPMEANS) printf("Means "); if (uFlags&UPVARS) printf("Variances "); if (uFlags&UPMIXES && maxMixes>1) printf("MixWeights"); printf("\n\n"); printf(" - system is "); switch (hset.hsKind){ case PLAINHS: printf("PLAIN\n"); break; case SHAREDHS: printf("SHARED\n"); break; case TIEDHS: printf("TIED\n"); break; case DISCRETEHS: printf("DISCRETE\n"); break; } fflush(stdout);} /* Initialise1: 1st phase of init prior to loading dbase */void Initialise1(void){ MLink link; LabId hmmId; char base[MAXSTRLEN]; char path[MAXSTRLEN]; char ext[MAXSTRLEN]; int s; /* Load HMM def */ if(MakeOneHMM( &hset,BaseOf(hmmfn,base))<SUCCESS) HError(2128,"Initialise1: MakeOneHMM failed"); if(LoadHMMSet( &hset,PathOf(hmmfn,path),ExtnOf(hmmfn,ext))<SUCCESS) HError(2128,"Initialise1: LoadHMMSet failed"); SetParmHMMSet(&hset); if (hset.hsKind!=PLAINHS) uFlags = (UPDSet) (uFlags & (~(UPMEANS|UPVARS))); /* Get a pointer to the physical HMM and set related globals */ hmmId = GetLabId(base,FALSE); link = FindMacroName(&hset,'h',hmmId); hmm = (HLink)link->structure; nStates = hmm->numStates; nStreams = hset.swidth[0]; hsKind = hset.hsKind; /* Stacks for global structures requiring memory allocation */ CreateHeap(&segmentStack,"SegStore", MSTAK, 1, 0.0, 100000, LONG_MAX); CreateHeap(&alphaBetaStack,"AlphaBetaStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&accsStack,"AccsStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&transStack,"TransStore", MSTAK, 1, 0.0, 1000, 1000); CreateHeap(&bufferStack,"BufferStore", MSTAK, 1, 0.0, 1000, 1000); AttachAccs(&hset, &accsStack, uFlags); SetVFloor( &hset, vFloor, minVar); if(segLab != NULL) segId = GetLabId(segLab,TRUE); if(trace&T_TOP) PrintInitialInfo(); maxMixes = MaxMixtures(hmm); for(s=1; s<=nStreams; s++) maxMixInS[s] = MaxMixInS(hmm, s); T = maxT = 0; minT = 100000;}/* Initialise2: 2nd phase of init after loading dbase */void Initialise2(void){ int t,j,m,s; alpha = CreateDMatrix(&alphaBetaStack,nStates,maxT); beta = CreateDMatrix(&alphaBetaStack,nStates,maxT); outprob = CreateMatrix(&alphaBetaStack,nStates-1,maxT); /* row 1 not used */ ZeroMatrix(outprob); if (maxMixes>1){ mixoutp = (Matrix**)New(&alphaBetaStack, (nStates-2)*sizeof(Matrix*)); mixoutp -= 2; for (j=2;j<nStates;j++){ mixoutp[j] = (Matrix*)New(&alphaBetaStack, maxT*sizeof(Matrix)); --mixoutp[j]; for (t=1;t<=maxT;t++){ mixoutp[j][t] = CreateMatrix(&alphaBetaStack,nStreams,maxMixes); for (s=1;s<=nStreams;s++){ for (m=1;m<=maxMixes;m++) mixoutp[j][t][s][m]=LZERO; } } } } if (nStreams>1){ stroutp = (Vector**)New(&alphaBetaStack, maxT*sizeof(Vector*)); --stroutp; for (t=1;t<=maxT;t++){ stroutp[t] = (Vector*)New(&alphaBetaStack,(nStates-2)*sizeof(Vector)); stroutp[t] -= 2; for (j=2;j<nStates;j++) stroutp[t][j] = CreateVector(&alphaBetaStack,nStreams); } } occr = CreateVector(&gstack,nStates-1); zot = CreateVector(&gstack,hset.vecSize);}/* ---------------------------- Load Data ------------------------- *//* CheckData: check data file consistent with HMM definition */void CheckData(char *fn, BufferInfo info) { char tpk[80]; char mpk[80]; if (info.tgtPK != hset.pkind) HError(2250,"CheckData: Parameterisation in %s[%s] is incompatible with hmm %s[%s]", fn,ParmKind2Str(info.tgtPK,tpk),hmmfn,ParmKind2Str(hset.pkind,mpk)); if (info.tgtVecSize!=hset.vecSize) HError(2250,"CheckData: Vector size in %s[%d] is incompatible with hmm %s[%d]", fn,info.tgtVecSize,hmmfn,hset.vecSize);}/* InitSegStore : Initialise segStore for particular observation */void InitSegStore(BufferInfo *info){ Observation obs; Boolean eSep; SetStreamWidths(info->tgtPK,info->tgtVecSize,hset.swidth,&eSep); obs = MakeObservation(&gstack,hset.swidth,info->tgtPK, hset.hsKind==DISCRETEHS,eSep); segStore = CreateSegStore(&segmentStack,obs,10); firstTime = FALSE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -