⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hrest.c

📁 HMM的另一种经典的训练算法,需要的快下啊
💻 C
📖 第 1 页 / 共 3 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//*      Entropic Cambridge Research Laboratory                 *//*      (now part of Microsoft)                                *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*              2002  Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*         File: HRest.c: HMM initialisation program           *//* ----------------------------------------------------------- */char *hrest_version = "!HVER!HRest:   3.3 [CUED 28/04/05]";char *hrest_vc_id = "$Id: HRest.c,v 1.1.1.1 2005/05/12 10:52:54 jal58 Exp $";/*   This program is used to estimate the transition parameters,   means, covariances and mixture weights of a   hidden Markov model using Baum-Welch reestimation.   It handles multiple streams and tying but it ignores   stream weights by assuming that they are all unity.*//* Trace Flags */#define T_TOP    0001    /* Top level tracing */#define T_LD0    0002    /* File Loading */#define T_LD1    0004    /* + segments within each file */#define T_OTP    0010    /* Observation Probabilities */#define T_ALF    0020    /* Alpha matrices */#define T_BET    0040    /* Beta matrices */#define T_OCC    0100    /* Occupation Counters */#define T_TAC    0200    /* Transition Counters */#define T_MAC    0400    /* Mean Counters */#define T_VAC   01000    /* Variance Counters */#define T_WAC   02000    /* MixWeight Counters */#define T_TRE   04000    /* Reestimated transition matrix */#define T_WRE  010000    /* Reestimated mixture weights */#define T_MRE  020000    /* Reestimated means */#define T_VRE  040000    /* Reestimated variances */#define T_LGP 0100000    /* Compare LogP via alpha and beta */#include "HShell.h"     /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HTrain.h"#include "HUtil.h"/* Global Settings */static char * segLab = NULL;     /* segment label if any */static LabId  segId  = NULL;     /* and its index */static char * labDir = NULL;     /* label file directory */static char * labExt = "lab";    /* label file extension */static char * outDir = NULL;     /* output macro file directory, if any */static int  maxIter  = 20;       /* max iterations in parameter estimation */static float epsilon = 1.0E-4;   /* convergence criterion */static int minSeg    = 3;        /* min segments to train a model */static Boolean firstTime = TRUE; /* Flag used to enable InitSegStore */static Boolean saveBinary = FALSE;  /* save output in binary  */static FileFormat dff=UNDEFF;    /* data file format */static FileFormat lff=UNDEFF;    /* label file format */static float minVar  = 0.0;      /* minimum variance */static float mixWeightFloor=0.0; /* Floor for mixture weights */static float tMPruneThresh = 10.0;    /* tied mix prune threshold */static char *hmmfn;              /* HMM definition file name */static char *outfn=NULL;         /* output definition file name */static UPDSet uFlags = (UPDSet) (UPMEANS|UPVARS|UPTRANS|UPMIXES);     /* update flags */static int  trace    = 0;        /* Trace level */static ConfParam *cParm[MAXGLOBS];   /* configuration parameters */static int nParm = 0;               /* total num params */static Boolean segReject = TRUE; /* Enable short train segment rejection *//* Global Data Structures */static HMMSet hset;        /* The current unitary hmm set */static HLink hmm;          /* link to the hmm itself */static int nStates;        /* numStates of hmm */static int nStreams;       /* numStreams of hmm */static HSetKind hsKind;          /* kind of the HMM system */static int maxMixes;       /* max num mixtures across all streams */static int maxMixInS[SMAX];/* array[1..swidth[0]] of max mixes */static int nSeg;           /* num training segments */static int nTokUsed;       /* actual number of tokens used */static int maxT,minT,T;    /* max,min and current segment lengths */static DMatrix alpha;      /* array[1..nStates][1..maxT] of forward prob */static DMatrix beta;       /* array[1..nStates][1..maxT] of backward prob */static Matrix outprob;     /* array[2..nStates-1][1..maxT] of output prob */static Vector **stroutp;   /* array[1..maxT][2..nStates-1][1..nStreams] ...*/                           /* ... of streamprob */static Matrix **mixoutp;   /* array[2..nStates-1][1..maxT][1..nStreams]                              [1..maxMixes] of mixprob */static Vector occr;        /* array[1..nStates-1] of occ count for cur time */static Vector zot;         /* temp storage for zero mean obs vector */static Vector vFloor[SMAX];      /* variance floor - default is all zero */static float vDefunct=0.0;       /* variance below which mixture defunct */static SegStore segStore;        /* Storage for data segments */static MemHeap segmentStack;     /* Used by segStore */static MemHeap alphaBetaStack;   /* For storage of alpha and beta probs */static MemHeap accsStack;        /* For storage of accumulators */static MemHeap transStack;       /* For storage of transcription */static MemHeap bufferStack;      /* For storage of buffer */static ParmBuf pbuf;             /* Currently input parm buffer */   /* ------------------ Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to HRest  */void SetConfParms(void){   int i;   double d;   Boolean b;   nParm = GetConfig("HREST", TRUE, cParm, MAXGLOBS);   if (nParm>0) {      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;      if (GetConfBool(cParm,nParm,"SAVEBINARY",&b)) saveBinary = b;      if (GetConfFlt(cParm,nParm,"VDEFUNCT",&d)) vDefunct = d;   }}void ReportUsage(void){   printf("\nUSAGE: HRest [options] hmmFile trainFiles...\n\n");   printf(" Option                                       Default\n\n");   printf(" -e f    Set convergence factor epsilon       1.0E-4\n");   printf(" -i N    Set max iterations to N              20\n");   printf(" -l s    Set segment label to s               none\n");   printf(" -m N    Set min segments needed              3\n");   printf(" -t      Disable short segment rejection      on\n");   printf(" -u tmvw Update t)rans m)eans v)ars w)ghts    tmvw\n");   printf(" -v f    Set minimum variance to f            0.0\n");   printf(" -c f    Tied Mixture pruning threshold       10.0\n");   printf(" -w f    Set mix wt floor to f x MINMIX       0.0\n");   PrintStdOpts("BFGHILMSTX");   printf("\n\n");}void SetuFlags(void){   char *s;      s=GetStrArg();   uFlags=(UPDSet) 0;           while (*s != '\0')      switch (*s++) {      case 't': uFlags = (UPDSet) (uFlags+UPTRANS); break;      case 'm': uFlags = (UPDSet) (uFlags+UPMEANS); break;      case 'v': uFlags = (UPDSet) (uFlags+UPVARS); break;      case 'w': uFlags = (UPDSet) (uFlags+UPMIXES); break;      default: HError(2220,"SetuFlags: Unknown update flag %c",*s);         break;      }}int main(int argc, char *argv[]){   char *datafn, *s;   void Initialise1(void);   void Initialise2(void);   void LoadFile(char *fn);   void ReEstimateModel(void);   void SaveModel(char *outfn);    if(InitShell(argc,argv,hrest_version,hrest_vc_id)<SUCCESS)      HError(2200,"HRest: InitShell failed");   InitMem();   InitLabel();   InitMath();  InitSigP();   InitWave();  InitAudio();   InitVQ();    InitModel();   if(InitParm()<SUCCESS)        HError(2200,"HRest: InitParm failed");   InitTrain(); InitUtil();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(0);   SetConfParms();   CreateHMMSet(&hset,&gstack,FALSE);   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(2219,"HRest: Bad switch %s; must be single letter",s);      switch(s[0]){            case 'e':         epsilon = GetChkedFlt(0.0,1.0,s); break;      case 'i':         maxIter = GetChkedInt(1,100,s); break;      case 'l':         if (NextArg() != STRINGARG)            HError(2219,"HRest: Segment label expected");         segLab = GetStrArg();         break;      case 'm':         minSeg = GetChkedInt(1,1000,s); break;      case 't':         segReject = FALSE;         break;      case 'u':         SetuFlags(); break;      case 'v':         minVar = GetChkedFlt(0.0,100.0,s); break;      case 'c':         tMPruneThresh = GetChkedFlt(0.0,1000.0,s); break;      case 'w':         mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break;      case 'B':         saveBinary = TRUE;         break;      case 'F':         if (NextArg() != STRINGARG)            HError(2219,"HRest: Data File format expected");         if((dff = Str2Format(GetStrArg())) == ALIEN)            HError(-2289,"HRest: Warning ALIEN Data file format set");         break;      case 'G':         if (NextArg() != STRINGARG)            HError(2219,"HRest: Label File format expected");         if((lff = Str2Format(GetStrArg())) == ALIEN)            HError(-2289,"HRest: Warning ALIEN Label file format set");         break;      case 'H':         if (NextArg() != STRINGARG)            HError(2219,"HRest: HMM macro file name expected");         AddMMF(&hset,GetStrArg());         break;      case 'I':         if (NextArg() != STRINGARG)            HError(2219,"HRest: MLF file name expected");         LoadMasterFile(GetStrArg());         break;      case 'L':         if (NextArg()!=STRINGARG)            HError(2219,"HRest: Label file directory expected");         labDir = GetStrArg(); break;      case 'M':         if (NextArg()!=STRINGARG)            HError(2219,"HRest: Output macro file directory expected");         outDir = GetStrArg(); break;        case 'T':         trace = GetChkedInt(0,0100000,s); break;      case 'X':         if (NextArg()!=STRINGARG)            HError(2219,"HRest: Label file extension expected");         labExt = GetStrArg(); break;      default:         HError(2219,"HRest: Unknown switch %s",s);      }   }   if (NextArg()!=STRINGARG)      HError(2219,"HRest: source HMM file name expected");   hmmfn = GetStrArg();   if (outfn==NULL) outfn = hmmfn;   Initialise1();   do {      if (NextArg()!=STRINGARG)         HError(2219,"HRest: training data file name expected");      datafn = GetStrArg();      LoadFile(datafn);   } while (NumArgs()>0);   nSeg = NumSegs(segStore);   if (nSeg < minSeg)      HError(2221,"HRest: Too Few Training Examples [%d]",nSeg);   Initialise2();   if (trace&T_TOP) {      printf("%d Examples loaded, Max length = %d, Min length = %d\n",             nSeg, maxT,minT);      fflush(stdout);   }   ReEstimateModel();   if(SaveHMMSet(&hset,outDir,NULL,NULL,saveBinary)<SUCCESS)      HError(2211,"HRest: SaveHMMSet failed");   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* ------------------------ Initialisation ----------------------- *//* PrintInitialInfo: print a header of program settings */void PrintInitialInfo(void){      printf("Reestimating HMM %s . . . \n",hmmfn);   PrintHMMProfile(stdout, hmm);   if (segLab==NULL)      printf(" SegLab   :  NONE\n");   else      printf(" SegLab   :  %s\n",segLab);   printf(" MaxIter  :  %d\n",maxIter);   printf(" Epsilon  :  %f\n",epsilon);   printf(" Updating :  ");   if (uFlags&UPTRANS) printf("Transitions ");    if (uFlags&UPMEANS) printf("Means ");    if (uFlags&UPVARS)  printf("Variances ");    if (uFlags&UPMIXES && maxMixes>1)  printf("MixWeights");    printf("\n\n");   printf(" - system is ");   switch (hset.hsKind){   case PLAINHS:  printf("PLAIN\n");  break;   case SHAREDHS: printf("SHARED\n"); break;   case TIEDHS:   printf("TIED\n");   break;   case DISCRETEHS: printf("DISCRETE\n"); break;   }      fflush(stdout);}   /* Initialise1: 1st phase of init prior to loading dbase */void Initialise1(void){   MLink link;   LabId  hmmId;   char base[MAXSTRLEN];   char path[MAXSTRLEN];   char ext[MAXSTRLEN];   int s;   /* Load HMM def */   if(MakeOneHMM( &hset,BaseOf(hmmfn,base))<SUCCESS)      HError(2128,"Initialise1: MakeOneHMM failed");   if(LoadHMMSet( &hset,PathOf(hmmfn,path),ExtnOf(hmmfn,ext))<SUCCESS)      HError(2128,"Initialise1: LoadHMMSet failed");   SetParmHMMSet(&hset);   if (hset.hsKind!=PLAINHS)      uFlags = (UPDSet) (uFlags & (~(UPMEANS|UPVARS)));   /* Get a pointer to the physical HMM and set related globals */   hmmId = GetLabId(base,FALSE);   link = FindMacroName(&hset,'h',hmmId);   hmm = (HLink)link->structure;     nStates = hmm->numStates;   nStreams = hset.swidth[0];   hsKind = hset.hsKind;      /* Stacks for global structures requiring memory allocation */   CreateHeap(&segmentStack,"SegStore", MSTAK, 1, 0.0, 100000, LONG_MAX);   CreateHeap(&alphaBetaStack,"AlphaBetaStore", MSTAK, 1, 0.0, 1000, 1000);   CreateHeap(&accsStack,"AccsStore", MSTAK, 1, 0.0, 1000, 1000);   CreateHeap(&transStack,"TransStore", MSTAK, 1, 0.0, 1000, 1000);   CreateHeap(&bufferStack,"BufferStore", MSTAK, 1, 0.0, 1000, 1000);   AttachAccs(&hset, &accsStack, uFlags);   SetVFloor( &hset, vFloor, minVar);   if(segLab != NULL)      segId = GetLabId(segLab,TRUE);   if(trace&T_TOP)      PrintInitialInfo();   maxMixes = MaxMixtures(hmm);   for(s=1; s<=nStreams; s++)      maxMixInS[s] = MaxMixInS(hmm, s);   T = maxT = 0; minT = 100000;}/* Initialise2: 2nd phase of init after loading dbase */void Initialise2(void){   int t,j,m,s;   alpha = CreateDMatrix(&alphaBetaStack,nStates,maxT);   beta = CreateDMatrix(&alphaBetaStack,nStates,maxT);   outprob = CreateMatrix(&alphaBetaStack,nStates-1,maxT); /* row 1 not used */   ZeroMatrix(outprob);   if (maxMixes>1){      mixoutp = (Matrix**)New(&alphaBetaStack, (nStates-2)*sizeof(Matrix*));      mixoutp -= 2;      for (j=2;j<nStates;j++){         mixoutp[j] = (Matrix*)New(&alphaBetaStack, maxT*sizeof(Matrix));         --mixoutp[j];         for (t=1;t<=maxT;t++){            mixoutp[j][t] = CreateMatrix(&alphaBetaStack,nStreams,maxMixes);            for (s=1;s<=nStreams;s++){               for (m=1;m<=maxMixes;m++)                  mixoutp[j][t][s][m]=LZERO;            }         }      }   }   if (nStreams>1){      stroutp = (Vector**)New(&alphaBetaStack, maxT*sizeof(Vector*));      --stroutp;      for (t=1;t<=maxT;t++){         stroutp[t] = (Vector*)New(&alphaBetaStack,(nStates-2)*sizeof(Vector));         stroutp[t] -= 2;         for (j=2;j<nStates;j++)            stroutp[t][j] = CreateVector(&alphaBetaStack,nStreams);      }   }   occr = CreateVector(&gstack,nStates-1);   zot = CreateVector(&gstack,hset.vecSize);}/* ---------------------------- Load Data ------------------------- *//* CheckData: check data file consistent with HMM definition */void CheckData(char *fn, BufferInfo info) {   char tpk[80];   char mpk[80];      if (info.tgtPK != hset.pkind)      HError(2250,"CheckData: Parameterisation in %s[%s] is incompatible with hmm %s[%s]",             fn,ParmKind2Str(info.tgtPK,tpk),hmmfn,ParmKind2Str(hset.pkind,mpk));   if (info.tgtVecSize!=hset.vecSize)      HError(2250,"CheckData: Vector size in %s[%d] is incompatible with hmm %s[%d]",             fn,info.tgtVecSize,hmmfn,hset.vecSize);}/* InitSegStore : Initialise segStore for particular observation */void InitSegStore(BufferInfo *info){   Observation obs;   Boolean eSep;   SetStreamWidths(info->tgtPK,info->tgtVecSize,hset.swidth,&eSep);   obs = MakeObservation(&gstack,hset.swidth,info->tgtPK,                         hset.hsKind==DISCRETEHS,eSep);   segStore = CreateSegStore(&segmentStack,obs,10);   firstTime = FALSE;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -