📄 hvite.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2001-2004 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HVite.c: recognise or align file or audio *//* ----------------------------------------------------------- */char *hvite_version = "!HVER!HVite: 3.3 [CUED 28/04/05]";char *hvite_vc_id = "$Id: HVite.c,v 1.1.1.1 2005/05/12 10:52:55 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HUtil.h"#include "HTrain.h"#include "HAdapt.h"#include "HMap.h"#include "HFB.h"#include "HDict.h"#include "HNet.h"#include "HRec.h"/* -------------------------- Trace Flags & Vars ------------------------ */#define T_TOP 00001 /* Basic progress reporting */#define T_OBS 00002 /* list observations */#define T_FRS 00004 /* Frame by frame best token */#define T_MEM 00010 /* Memory usage, start and finish */#define T_MMU 00020 /* Memory usage after each utterance */static int trace = 0;/* -------------------------- Global Variables etc ---------------------- *//* Doing what */static int nToks = 0; /* Number of tokens for N best */static int nTrans = 1; /* Number of transcriptions for N best */static Boolean states = FALSE; /* Keep track of state alignment */static Boolean models = FALSE; /* Keep track of model alignment *//* With what */static char *datFN; /* Speech file */static char *dictFn; /* Dictionary */static char *wdNetFn = NULL; /* Word level lattice */static char *hmmListFn; /* HMMs */static char * hmmDir = NULL; /* directory to look for hmm def files */static char * hmmExt = NULL; /* hmm def file extension */static Boolean loadLabels = FALSE; /* Load network for each file */static Boolean loadNetworks = FALSE; /* Load network for each file */static LabId bndId = NULL; /* Boundary word for alignment *//* Results and formats */static char * labDir = NULL; /* output label file directory */static char * labExt = "rec"; /* output label file extension */static char * labForm = NULL; /* output label reformat */static char * latForm = NULL; /* output lattice format */static char * labInDir = NULL; /* input network/label file directory */static char * labInExt = "lab"; /* input network/label file extension */static char * latExt = NULL; /* output lattice file extension */static FileFormat dfmt=UNDEFF; /* Data input file format */static FileFormat ifmt=UNDEFF; /* Label input file format */static FileFormat ofmt=UNDEFF; /* Label output file format */static Boolean saveAudioOut=FALSE;/* Save rec output from direct audio */static char * roPrefix=NULL; /* Prefix for direct audio output name */static char * roSuffix=NULL; /* Suffix for direct audio output name */static int roCounter = 0; /* Counter for audio output name */static Boolean replay = FALSE; /* enable audio replay *//* Language model */static double lmScale = 1.0; /* bigram and log(1/NSucc) scale factor */static LogDouble wordPen = 0.0; /* inter model propagation log prob */static double prScale = 1.0; /* pronunciation scale factor *//* Pruning */static LogDouble genBeam = -LZERO;/* genBeam threshold */static LogDouble genBeamInc = 0.0; /* increment */static LogDouble genBeamLim = -LZERO; /* max value */static LogDouble nBeam = 0.0; /* nBeam threshold */static LogDouble wordBeam = -LZERO;/* word-end pruning threshold */static LogFloat tmBeam = 10.0; /* tied mix prune threshold */static int maxActive = 0; /* max active phone instances *//* Global variables */static Observation obs; /* current observation */static HMMSet hset; /* the HMM set */static Vocab vocab; /* the dictionary */static Lattice *wdNet; /* the word level recognition network */static PSetInfo *psi; /* Private data used by HRec */static VRecInfo *vri; /* Visible HRec Info */static int maxM = 0; /* max mixtures in any model */static int maxMixInS[SMAX]; /* array[1..swidth[0]] of max mixes *//* Global adaptation variables */static int update = 0; /* Perfom MLLR & update every n utts */static UttInfo *utt; /* utterance info for state/frame align */static FBInfo *fbInfo; /* forward-backward info for alignment */static PSetInfo *alignpsi; /* Private data used by HRec */static VRecInfo *alignvri; /* Visible HRec Info */static Boolean saveBinary=FALSE; /* Save tmf in binary format *//* Heaps */static MemHeap ansHeap;static MemHeap modelHeap;static MemHeap netHeap;static MemHeap bufHeap;static MemHeap repHeap;static MemHeap regHeap;/* information about transforms */static XFInfo xfInfo;/* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0; /* total num params *//* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){ int i; Boolean b; char buf[MAXSTRLEN]; nParm = GetConfig("HVITE", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; if (GetConfStr(cParm,nParm,"RECOUTPREFIX",buf)) roPrefix=CopyString(&gstack,buf); if (GetConfStr(cParm,nParm,"RECOUTSUFFIX",buf)) roSuffix=CopyString(&gstack,buf); if (GetConfBool(cParm,nParm,"SAVEBINARY",&b)) saveBinary = b; }}void ReportUsage(void){ printf("\nUSAGE: HVite [options] VocabFile HMMList DataFiles...\n\n"); printf(" Option Default\n\n"); printf(" -a align from label files off\n"); printf(" -b s def s as utterance boundary word none\n"); printf(" -c f tied mixture pruning threshold 10.0\n"); printf(" -d s dir to find hmm definitions current\n"); printf(" -e save direct audio rec output off\n"); printf(" -f output full state alignment off\n"); printf(" -g enable audio replay off\n"); printf(" -h s set speaker name pattern *.mfc\n"); printf(" -i s Output transcriptions to MLF s off\n"); printf(" -j i Online MLLR adaptation off\n"); printf(" Perform update every i utterances \n"); printf(" -k use an input transform off\n"); printf(" -l s dir to store label/lattice files current\n"); printf(" -m output model alignment off\n"); printf(" -n i [N] N-best recognition (using i tokens) off\n"); printf(" -o s output label formating NCSTWMX none\n"); printf(" -p f inter model trans penalty (log) 0.0\n"); printf(" -q s output lattice formating ABtvaldmn tvaldmn\n"); printf(" -r f pronunciation prob scale factor 1.0\n"); printf(" -s f grammar scale factor 1.0\n"); printf(" -t f [f f] set pruning threshold 0.0\n"); printf(" -u i set pruning max active 0\n"); printf(" -v f set word end pruning threshold 0.0\n"); printf(" -w [s] recognise from network off\n"); printf(" -x s extension for hmm files none\n"); printf(" -y s output label file extension rec\n"); printf(" -z s generate lattices with extension s off\n"); PrintStdOpts("BEFGHIJKLPSX"); printf("\n\n");}int main(int argc, char *argv[]){ char *s; void Initialise(void); void DoRecognition(void); void DoAlignment(void); if(InitShell(argc,argv,hvite_version,hvite_vc_id)<SUCCESS) HError(3200,"HVite: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(3200,"HVite: InitParm failed"); InitDict(); InitNet(); InitRec(); InitUtil(); InitAdapt(&xfInfo); InitMap(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHeap(&modelHeap, "Model heap", MSTAK, 1, 0.0, 100000, 800000 ); CreateHMMSet(&hset,&modelHeap,TRUE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3219,"HVite: Bad switch %s; must be single letter",s); switch(s[0]){ case 'a': loadLabels=TRUE; break; case 'b': if (NextArg()!=STRINGARG) HError(3219,"HVite: Utterance boundary word expected"); bndId = GetLabId(GetStrArg(),TRUE); break; case 'c': tmBeam = GetChkedFlt(0.0,1000.0,s); break; case 'd': if (NextArg()!=STRINGARG) HError(3219,"HVite: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'e': saveAudioOut=TRUE; break; case 'f': states=TRUE; break; case 'g': replay=TRUE; break; case 'i':/*已经用*/ if (NextArg()!=STRINGARG) HError(3219,"HVite: Output MLF file name expected"); /* if(SaveToMasterfile(GetStrArg())<SUCCESS) HError(3214,"HCopy: Cannot write to MLF"); */ SaveToMasterfile(GetStrArg()); break; case 'k': xfInfo.useInXForm = TRUE; break; case 'j': if (NextArg()!=INTARG) HError(3219,"HVite: No. of files per online adaptation step expected"); update = GetChkedInt(1,256,s); break; case 'l': if (NextArg()!=STRINGARG) HError(3219,"HVite: Label file directory expected"); labDir = GetStrArg(); break; case 'm': models=TRUE; break; case 'n': nToks = GetChkedInt(2,MAX_TOKS,s); if (NextArg()==FLOATARG || NextArg()==INTARG) nTrans = GetChkedInt(1,10000,s); else nTrans = 1; break; case 'o': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output label format expected"); labForm = GetStrArg(); break; case 'p':/*已经用*/ wordPen = GetChkedFlt(-1000.0,1000.0,s); break; case 'q': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output lattice format expected"); latForm = GetStrArg(); break; case 'r': prScale = GetChkedFlt(0.0,1000.0,s); break; case 's':/*已经用*/ lmScale = GetChkedFlt(0.0,1000.0,s); break; case 't': genBeam = GetChkedFlt(0,1.0E20,s); if (genBeam == 0.0) genBeam = -LZERO; if (NextArg()==FLOATARG || NextArg()==INTARG) { genBeamInc = GetChkedFlt(0.0,1.0E20,s); genBeamLim = GetChkedFlt(0.0,1.0E20,s); if (genBeamLim < (genBeam + genBeamInc)) { genBeamLim = genBeam; genBeamInc = 0.0; } } else { genBeamInc = 0.0; genBeamLim = genBeam; } break; case 'w':/*已经用*/ if (NextArg()!=STRINGARG) loadNetworks=TRUE; else { wdNetFn = GetStrArg(); if (strlen(wdNetFn)==0) { wdNetFn=NULL; loadNetworks=TRUE; } } break; case 'u': maxActive = GetChkedInt(0,100000,s); break; case 'v': wordBeam = GetChkedFlt(0,1.0E20,s); if (wordBeam == 0.0) wordBeam = -LZERO; break; case 'x': if (NextArg()!=STRINGARG)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -