📄 hvite.c

📁 实现HMM算法
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//*      Entropic Cambridge Research Laboratory                 *//*      (now part of Microsoft)                                *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*          2001-2004 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*      File: HVite.c: recognise or align file or audio        *//* ----------------------------------------------------------- */char *hvite_version = "!HVER!HVite:   3.3 [CUED 28/04/05]";char *hvite_vc_id = "$Id: HVite.c,v 1.1.1.1 2005/05/12 10:52:55 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HUtil.h"#include "HTrain.h"#include "HAdapt.h"#include "HMap.h"#include "HFB.h"#include "HDict.h"#include "HNet.h"#include "HRec.h"/* -------------------------- Trace Flags & Vars ------------------------ */#define T_TOP 00001      /* Basic progress reporting */#define T_OBS 00002      /* list observations */#define T_FRS 00004      /* Frame by frame best token */#define T_MEM 00010      /* Memory usage, start and finish */#define T_MMU 00020      /* Memory usage after each utterance */static int trace = 0;/* -------------------------- Global Variables etc ---------------------- *//* Doing what */static int nToks = 0;             /* Number of tokens for N best */static int nTrans = 1;            /* Number of transcriptions for N best */static Boolean states = FALSE;    /* Keep track of state alignment */static Boolean models = FALSE;    /* Keep track of model alignment *//* With what */static char *datFN;               /* Speech file */static char *dictFn;              /* Dictionary */static char *wdNetFn = NULL;      /* Word level lattice */static char *hmmListFn;           /* HMMs */static char * hmmDir = NULL;      /* directory to look for hmm def files */static char * hmmExt = NULL;      /* hmm def file extension */static Boolean loadLabels = FALSE; /* Load network for each file */static Boolean loadNetworks = FALSE; /* Load network for each file */static LabId bndId = NULL;        /* Boundary word for alignment *//* Results and formats */static char * labDir = NULL;      /* output label file directory */static char * labExt = "rec";     /* output label file extension */static char * labForm = NULL;     /* output label reformat */static char * latForm = NULL;     /* output lattice format */static char * labInDir = NULL;    /* input network/label file directory */static char * labInExt = "lab";   /* input network/label file extension */static char * latExt = NULL;      /* output lattice file extension */static FileFormat dfmt=UNDEFF;    /* Data input file format */static FileFormat ifmt=UNDEFF;    /* Label input file format */static FileFormat ofmt=UNDEFF;    /* Label output file format */static Boolean saveAudioOut=FALSE;/* Save rec output from direct audio */static char * roPrefix=NULL;      /* Prefix for direct audio output name */static char * roSuffix=NULL;      /* Suffix for direct audio output name */static int roCounter = 0;         /* Counter for audio output name */static Boolean replay = FALSE;    /* enable audio replay *//* Language model */static double lmScale = 1.0;      /* bigram and log(1/NSucc) scale factor */static LogDouble wordPen = 0.0;   /* inter model propagation log prob */static double prScale = 1.0;      /* pronunciation scale factor *//* Pruning */static LogDouble genBeam = -LZERO;/* genBeam threshold */static LogDouble genBeamInc  = 0.0;       /* increment         */static LogDouble genBeamLim = -LZERO;     /* max value       */static LogDouble nBeam = 0.0;     /* nBeam threshold */static LogDouble wordBeam = -LZERO;/* word-end pruning threshold */static LogFloat tmBeam = 10.0;    /* tied mix prune threshold */static int maxActive = 0;         /* max active phone instances *//* Global variables */static Observation obs;           /* current observation */static HMMSet hset;               /* the HMM set */static Vocab vocab;               /* the dictionary */static Lattice *wdNet;            /* the word level recognition network */static PSetInfo *psi;             /* Private data used by HRec */static VRecInfo *vri;             /* Visible HRec Info */static int maxM = 0;              /* max mixtures in any model */static int maxMixInS[SMAX];       /* array[1..swidth[0]] of max mixes *//* Global adaptation variables */static int update = 0;            /* Perfom MLLR & update every n utts */static UttInfo *utt;              /* utterance info for state/frame align */static FBInfo *fbInfo;            /* forward-backward info for alignment */static PSetInfo *alignpsi;        /* Private data used by HRec */static VRecInfo *alignvri;        /* Visible HRec Info */static Boolean saveBinary=FALSE;  /* Save tmf in binary format *//* Heaps */static MemHeap ansHeap;static MemHeap modelHeap;static MemHeap netHeap;static MemHeap bufHeap;static MemHeap repHeap;static MemHeap regHeap;/* information about transforms */static XFInfo xfInfo;/* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;            /* total num params *//* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){   int i;   Boolean b;   char buf[MAXSTRLEN];   nParm = GetConfig("HVITE", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;      if (GetConfStr(cParm,nParm,"RECOUTPREFIX",buf))         roPrefix=CopyString(&gstack,buf);      if (GetConfStr(cParm,nParm,"RECOUTSUFFIX",buf))         roSuffix=CopyString(&gstack,buf);      if (GetConfBool(cParm,nParm,"SAVEBINARY",&b))          saveBinary = b;   }}void ReportUsage(void){   printf("\nUSAGE: HVite [options] VocabFile HMMList DataFiles...\n\n");   printf(" Option                                       Default\n\n");   printf(" -a      align from label files               off\n");   printf(" -b s    def s as utterance boundary word     none\n");   printf(" -c f    tied mixture pruning threshold       10.0\n");   printf(" -d s    dir to find hmm definitions          current\n");   printf(" -e      save direct audio rec output         off\n");   printf(" -f      output full state alignment          off\n");   printf(" -g      enable audio replay                  off\n");   printf(" -h s    set speaker name pattern             *.mfc\n");   printf(" -i s    Output transcriptions to MLF s       off\n");    printf(" -j i    Online MLLR adaptation               off\n");   printf("         Perform update every i utterances      \n");   printf(" -k      use an input transform               off\n");   printf(" -l s    dir to store label/lattice files     current\n");   printf(" -m      output model alignment               off\n");   printf(" -n i [N] N-best recognition (using i tokens) off\n");   printf(" -o s    output label formating NCSTWMX       none\n");   printf(" -p f    inter model trans penalty (log)      0.0\n");   printf(" -q s    output lattice formating ABtvaldmn   tvaldmn\n");   printf(" -r f    pronunciation prob scale factor      1.0\n");   printf(" -s f    grammar scale factor                 1.0\n");   printf(" -t f [f f] set pruning threshold             0.0\n");   printf(" -u i    set pruning max active               0\n");   printf(" -v f    set word end pruning threshold       0.0\n");    printf(" -w [s]  recognise from network               off\n");   printf(" -x s    extension for hmm files              none\n");   printf(" -y s    output label file extension          rec\n");   printf(" -z s    generate lattices with extension s   off\n");   PrintStdOpts("BEFGHIJKLPSX");   printf("\n\n");}int main(int argc, char *argv[]){   char *s;   void Initialise(void);   void DoRecognition(void);   void DoAlignment(void);   if(InitShell(argc,argv,hvite_version,hvite_vc_id)<SUCCESS)      HError(3200,"HVite: InitShell failed");   InitMem();   InitLabel();   InitMath();  InitSigP();   InitWave();  InitAudio();   InitVQ();    InitModel();   if(InitParm()<SUCCESS)        HError(3200,"HVite: InitParm failed");   InitDict();   InitNet();   InitRec();   InitUtil();    InitAdapt(&xfInfo); InitMap();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(0);   SetConfParms();   CreateHeap(&modelHeap, "Model heap",  MSTAK, 1, 0.0, 100000, 800000 );   CreateHMMSet(&hset,&modelHeap,TRUE);    while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(3219,"HVite: Bad switch %s; must be single letter",s);      switch(s[0]){      case 'a':         loadLabels=TRUE; break;      case 'b':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Utterance boundary word expected");         bndId = GetLabId(GetStrArg(),TRUE); break;      case 'c':         tmBeam = GetChkedFlt(0.0,1000.0,s); break;                case 'd':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: HMM definition directory expected");         hmmDir = GetStrArg(); break;      case 'e':         saveAudioOut=TRUE; break;      case 'f':         states=TRUE; break;      case 'g':         replay=TRUE; break;      case 'i':/*已经用*/         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Output MLF file name expected");         /* if(SaveToMasterfile(GetStrArg())<SUCCESS)            HError(3214,"HCopy: Cannot write to MLF"); */         SaveToMasterfile(GetStrArg());         break;      case 'k':	 xfInfo.useInXForm = TRUE;	 break;      case 'j':         if (NextArg()!=INTARG)            HError(3219,"HVite: No. of files per online adaptation step expected");         update = GetChkedInt(1,256,s);         break;      case 'l':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Label file directory expected");         labDir = GetStrArg(); break;      case 'm':         models=TRUE; break;      case 'n':         nToks = GetChkedInt(2,MAX_TOKS,s);         if (NextArg()==FLOATARG || NextArg()==INTARG)            nTrans = GetChkedInt(1,10000,s);         else            nTrans = 1;         break;            case 'o':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Output label format expected");         labForm = GetStrArg(); break;      case 'p':/*已经用*/         wordPen = GetChkedFlt(-1000.0,1000.0,s);  break;      case 'q':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Output lattice format expected");         latForm = GetStrArg(); break;      case 'r':         prScale = GetChkedFlt(0.0,1000.0,s);  break;      case 's':/*已经用*/         lmScale = GetChkedFlt(0.0,1000.0,s);  break;      case 't':         genBeam = GetChkedFlt(0,1.0E20,s); 	 if (genBeam == 0.0)	    genBeam = -LZERO;         if (NextArg()==FLOATARG || NextArg()==INTARG) {             genBeamInc = GetChkedFlt(0.0,1.0E20,s);             genBeamLim = GetChkedFlt(0.0,1.0E20,s);             if (genBeamLim < (genBeam + genBeamInc)) {                genBeamLim = genBeam; genBeamInc = 0.0;             }          }          else {             genBeamInc = 0.0;             genBeamLim = genBeam;          }            break;      case 'w':/*已经用*/         if (NextArg()!=STRINGARG)            loadNetworks=TRUE;         else {            wdNetFn = GetStrArg();            if (strlen(wdNetFn)==0) {               wdNetFn=NULL;               loadNetworks=TRUE;            }         }         break;      case 'u':         maxActive = GetChkedInt(0,100000,s); break;            case 'v':         wordBeam = GetChkedFlt(0,1.0E20,s);          if (wordBeam == 0.0)            wordBeam = -LZERO;         break;      case 'x':         if (NextArg()!=STRINGARG)
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -