📄 hhed.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2002-2004 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HHEd: HMM Source Definition Editor *//* ----------------------------------------------------------- */char *hhed_version = "!HVER!HHEd: 3.3 [CUED 28/04/05]";char *hhed_vc_id = "$Id: HHEd.c,v 1.3 2005/07/22 10:17:13 mjfg Exp $";/* This program is used to read in a set of HMM definitions and then edit them according to the contents of a file of edit commands. See the routine called Summary for a list of these or run HHEd with the command option*/#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HUtil.h"#include "HTrain.h"#include "HAdapt.h"#define FLOAT_MAX 1E10 /* Limit for float arguments */#define BIG_FLOAT 1E20 /* Limit for float arguments */#define MAX_ITER 500 /* Maximum number of iterations */ /* Lowest level of tracing reset at end of each command block */#define T_BAS 0x0001 /* Basic progess tracing */#define T_INT 0x0002 /* Intermediate progress tracing */#define T_DET 0x0004 /* Detailed progress tracing */#define T_ITM 0x0008 /* Show item lists */#define T_MEM 0x0010 /* Show state of memory (once only) *//* General tracing flags from command line */#define T_MAC 0x0040 /* Trace changes to macro definitions */#define T_SIZ 0x0080 /* Trace changes to stream widths *//* Detailed control over clustering output */#define T_CLUSTERS 0x0100 /* Show contents of clusters */#define T_QST 0x0200 /* Show items specified by questions */#define T_TREE_ANS 0x0400 /* Trace best unseen triphone tree filtering */#define T_TREE_BESTQ 0x0800 /* Trace best question for splitting each node */#define T_TREE_BESTM 0x1000 /* Trace best merge of terminal nodes */#define T_TREE_OKQ 0x2000 /* Trace all questions exceeding threshold */#define T_TREE_ALLQ 0x4000 /* Trace all questions */#define T_TREE_ALLM 0x8000 /* Trace all possible merges *//* Extra flags to allow easier multiple checks */#define T_TREE 0xf400 /* Any specific tree tracing */#define T_IND 0x0006 /* Intermediate or detailed tracing */#define T_BID 0x0007 /* Basic, intermediate or detailed tracing */#define T_MD 0x10000 /* Trace mix down detail merge */static MemHeap questHeap; /* Heap holds all questions */static MemHeap hmmHeap; /* Heap holds all hmm related info */static MemHeap tmpHeap; /* Temporary (duration of command or less) heap *//* Global Settings */static char * hmmDir = NULL; /* directory to look for hmm def files */static char * hmmExt = NULL; /* hmm def file extension */static char * newDir = NULL; /* directory to store new hmm def files */static char * newExt = NULL; /* extension of new edited hmm files */static Boolean noAlias = FALSE; /* set to zap all aliases in hmmlist */static Boolean inBinary = FALSE; /* set to save models in binary */static char * mmfFn = NULL; /* output MMF file, if any */static int cmdTrace = 0; /* trace level from command line */static int trace = 0; /* current trace level *//* Global Data Structures */static HMMSet hSet; /* current HMM set */static HMMSet *hset; /* current HMM set */static int fidx; /* current macro file id */static int maxStates; /* max number of states in current HMM set */static int maxMixes; /* max number of mixes in current HMM set */static Source source; /* the current input file */static MixtureElem *joinSet; /* current join Mix Set */static int nJoins; /* current num mixs in joinSet */static int joinSize=0; /* number of mixes in a joined pdf */static float joinFloor; /* join mix weight floor (* MINMIX) */static Boolean badGC = FALSE; /* set TRUE if gConst out of date */static float meanGC,stdGC; /* mean and stdev of GConst */static Boolean occStatsLoaded = FALSE; /* set when RO/LS has loaded occ stats */static float outlierThresh = -1.0; /* outlier threshold set by RO cmd */static int thisCommand; /* index of current command */static int lastCommand=0; /* index of previous command */static Boolean equivState = TRUE; /* TRUE if states can be equivalent */ /* but not identical */static Boolean useModelName = TRUE; /* Use base-phone name as tree name *//* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0; /* total num params */static Boolean treeMerge = TRUE; /* After tree spltting merge leaves */static char tiedMixName[MAXSTRLEN] = "TM"; /* Tied mixture base name */static char mmfIdMask[MAXSTRLEN] = "*"; /* MMF Id Mask for baseclass */static Boolean useLeafStats = TRUE; /* Use leaf stats to init macros */static Boolean applyVFloor = TRUE; /* apply modfied varFloors to vars in model set */ /* ------------------ Process Command Line -------------------------- */void SetConfParms(void){ Boolean b; int i; nParm = GetConfig("HHED", TRUE, cParm, MAXGLOBS); if (nParm>0) { if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; if (GetConfBool(cParm,nParm,"TREEMERGE",&b)) treeMerge = b; if (GetConfBool(cParm,nParm,"USELEAFSTATS",&b)) useLeafStats = b; if (GetConfBool(cParm,nParm,"APPLYVFLOOR",&b)) applyVFloor = b; if (GetConfBool(cParm,nParm,"USEMODELNAME",&b)) useModelName = b; GetConfStr(cParm,nParm,"TIEDMIXNAME",tiedMixName); GetConfStr(cParm,nParm,"MMFIDMASK",mmfIdMask); }}void Summary(void){ printf("\nHHEd Command Summary\n\n"); printf("AT i j prob itemlist - Add Transition from i to j in given mats\n"); printf("AU hmmlist - Add Unseen triphones in given hmmlist to\n"); printf(" currently loaded HMM list using previously\n"); printf(" built decision trees.\n"); printf("CL hmmList - CLone hmms to give new hmmList\n"); printf("CO newHmmList - COmpact identical HMM's by sharing same phys model\n"); printf("DP s n id ... - Duplicate the hmm set n times using id to differentiate\n"); printf(" the new hmms and macros. Only macros the type of which\n"); printf(" appears in s will be duplicated, others will be shared.\n"); printf("FA f - Set variance floor to average within state variance * f\n"); printf("FV vFloorfile - Load variance floor from file\n"); printf("FC - Convert diagonal variances to full covariances\n"); printf("HK hsetkind - change current set to hsetkind\n"); printf("JO size floor - set size and min mix weight for a JOin\n"); printf("LS statsfile - load named statsfile\n"); printf("LT filename - Load Questions and Trees from filename\n"); printf("MD n itemlist - MixDown command, change mixtures in itemlist to n\n"); printf("MM s itemlist - make each item in list into a macro with usage==1\n"); printf("MT triHmmList - Make Triphones from loaded biphones\n"); printf("MU n itemlist - MixUp command, change mixtures in itemlist to n\n"); printf("NC n macro itemlist - N-Cluster specified components and tie\n"); printf("QS name itemlist - define a question as a list of model names\n"); printf("RC n id [itemList] - Build n regression classes (for adaptation purposes)\n"); printf(" also supplying a regression tree identifier/label name\n"); printf(" Optional itemList to specify non-speech sounds\n"); printf("RM hmmfile - rem mean in state 2, mix 1 of hmmfile from all \n"); printf(" loaded models nb. whole mean is removed incl. dels\n"); printf("RN hmmSetIdentifier - Rename the hmm mmf with a new identifier name\n"); printf(" If omitted and MMF contains no identifier, then\n"); printf(" the MMF is given the identifier \"Standard\"\n"); printf("RO f [statsfile] - Remove outliers with counts < f as the\n"); printf(" final phase in the TC/NC commands. If statsfile\n"); printf(" is omitted, it must be already loaded (see LS)\n"); printf("RT i j itemlist - Rem Transition from i to j in given mats\n"); printf("SH - show the current HMM set (for debugging)\n"); printf("SK sk - Set sample kind of all models to sk\n"); printf("SS n - Split into n data Streams\n"); printf("ST filename - Save Questions and Trees to filename\n"); printf("SU n w1 .. wn - Split into user defined stream widths\n"); printf("SW s n - Set width of stream s to n\n"); printf("TB f macro itemlist - Tree build using QS questions and likelihood\n"); printf(" based clustering criterion.\n"); printf("TC f macro itemlist - Thresh Cluster specified comps to thresh f and tie\n"); printf("TI macro itemlist - TIe the specified components\n"); printf("TR n - set trace level to n (overrides -T option)\n"); printf("UT itemlist - UnTie the specified components\n"); printf("XF filename - Set the Input Xform to filename\n"); Exit(0);}void ReportUsage(void){ printf("\nUSAGE: HHEd [options] editF hmmList\n\n"); printf(" Option Default\n\n"); printf(" -d s dir to find hmm definitions current\n"); printf(" -o s extension for new hmm files as source\n"); printf(" -w mmf Save all HMMs to macro file mmf s as source\n"); printf(" -x s extension for hmm files none\n"); printf(" -z zap aliases in hmmList\n"); PrintStdOpts("BHMQ");}int main(int argc, char *argv[]){ char *s, *editFn; void DoEdit(char * editFn); void ZapAliases(void); void Initialise(char *hmmListFn); if(InitShell(argc,argv,hhed_version,hhed_vc_id)<SUCCESS) HError(2600,"HHEd: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2600,"HHEd: InitParm failed"); InitUtil(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHeap(&hmmHeap,"Model Heap",MSTAK,1,1.0,40000,400000); CreateHMMSet(&hSet,&hmmHeap,TRUE);hset=&hSet;fidx=0; while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2619,"HHEd: Bad switch %s; must be single letter",s); switch(s[0]) { case 'd': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Input HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'o': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Output HMM file extension expected"); newExt = GetStrArg(); break; case 'w': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Output MMF file name expected"); mmfFn = GetStrArg(); break; case 'x': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Input HMM file extension expected"); hmmExt = GetStrArg(); break; case 'z': noAlias = TRUE; break; case 'B': inBinary=TRUE; break; case 'H': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Input MMF file name expected"); AddMMF(hset,GetStrArg()); break; case 'M': if (NextArg()!=STRINGARG) HError(2619,"HHEd: Output HMM definition directory expected"); newDir = GetStrArg(); break; case 'Q': Summary(); break; case 'T': trace = cmdTrace = GetChkedInt(0,0x3FFFF,s); break; default: HError(2619,"HHEd: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(2619,"HHEd: Edit script file name expected"); editFn = GetStrArg(); if (NextArg() != STRINGARG) HError(2619,"HHEd: HMM list file name expected"); if (NumArgs()>1) HError(2619,"HHEd: Unexpected extra args on command line"); Initialise(GetStrArg()); if (hset->logWt == TRUE) HError(999,"HHEd requires linear weights"); DoEdit(editFn); Exit(0); return (0); /* never reached -- make compiler happy */}/* ----------------------- Lexical Routines --------------------- */int ChkedInt(char *what,int min,int max){ int ans; if (!ReadInt(&source,&ans,1,FALSE)) HError(2650,"ChkedInt: Integer read error - %s",what); if (ans<min || ans>max) HError(2651,"ChkedInt: Integer out of range - %s",what); return(ans);}float ChkedFloat(char *what,float min,float max){ float ans; if (!ReadFloat(&source,&ans,1,FALSE)) HError(2650,"ChkedFloat: Float read error - %s",what); if (ans<min || ans>max) HError(2651,"ChkedFloat: Float out of range - %s",what); return(ans);}char *ChkedAlpha(char *what,char *buf){ if (!ReadString(&source,buf)) HError(2650,"ChkedAlpha: String read error - %s",what); return(buf);}/* ------------- Question Handling for Tree Building ----------- */typedef struct _IPat{ char *pat; struct _IPat *next;}IPat;typedef struct _QEnt *QLink; /* Linked list of Questions */typedef struct _QEnt{ /* each question stored as both pattern and */ LabId qName; /* an expanded list of model names */ IPat *patList; ILink ilist; QLink next;}QEnt;static QLink qHead = NULL; /* Head of question list */static QLink qTail = NULL; /* Tail of question list *//* TraceQuestion: output given questions */void TraceQuestion(char *cmd, QLink q){ IPat *ip; printf(" %s %s: defines %d % d models\n ", cmd,q->qName->name,NumItems(q->ilist),hset->numLogHMM); for (ip=q->patList; ip!=NULL; ip=ip->next) printf("%s ",ip->pat); printf("\n"); fflush(stdout);}/* ParseAlpha: get next string from src and store it in s *//* This is a copy of ParseString with extra terminators */char *ParseAlpha(char *src, char *s){ static char term[]=".,)"; /* Special string terminators */ Boolean wasQuoted; int c,q=0; wasQuoted=FALSE; while (isspace((int) *src)) src++; if (*src == DBL_QUOTE || *src == SING_QUOTE){ wasQuoted = TRUE; q = *src; src++; } while(*src) { if (wasQuoted) { if (*src == q) return (src+1); } else { if (isspace((int) *src) || strchr(term,*src)) return (src); } if (*src==ESCAPE_CHAR) { src++; if (src[0]>='0' && src[1]>='0' && src[2]>='0' && src[0]<='7' && src[1]<='7' && src[2]<='7') { c = 64*(src[0] - '0') + 8*(src[1] - '0') + (src[2] - '0'); src+=2; } else c = *src; } else c = *src; *s++ = c; *s=0; src++; } return NULL;}/* LoadQuestion: store given question in question list */void LoadQuestion(char *qName, ILink ilist, char *pattern){ QLink q,c; LabId labid; IPat *ip; char *p,*r,buf[MAXSTRLEN]; q=(QLink) New(&questHeap,sizeof(QEnt)); q->ilist=ilist; labid=GetLabId(qName,TRUE); for (c=qHead;c!=NULL;c=c->next) if (c->qName==labid) break; if (c!=NULL) HError(2661,"LoadQuestion: Question name %s invalid",qName); q->qName=labid; labid->aux=q; q->next = NULL; q->patList = NULL; if (qHead==NULL) { qHead = q; qTail = q; } else { qTail->next = q; qTail = q; } for (p=pattern;*p && isspace((int) *p);p++); if (*p!='{') if (p==NULL) HError(2660,"LoadQuestion: no { in itemlist"); ++p; for (r=pattern+strlen(pattern)-1;r>=pattern && isspace((int) *r);r--); if (*r!='}') HError(2660,"LoadQuestion: no } in itemlist"); *r = ','; do { /* pick up model patterns from item list */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -