⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hbuild.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*     File: HBuild.c:  Word-Lattice Building                  *//* ----------------------------------------------------------- */char *hbuild_version = "!HVER!HBuild:   3.3 [CUED 28/04/05]";char *hbuild_vc_id = "$Id: HBuild.c,v 1.1.1.1 2005/05/12 10:52:53 jal58 Exp $";/* The HBuild program takes input files in a number of different   formats and constructs suitable HTK word lattice files.   The formats currently supported by HBuild include:   a) Bigrams in either ARPA/Lincol-Labs format or HTK matrix format   b) HTK Multi-Level lattices   c) Word Lists for simple loops   d) ARPA word-pair grammars (Resource Management style)*//* Trace Flags */#define T_TOP        0001    /* Top Level tracing */#include "HShell.h" /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h" #include "HLabel.h"#include "HModel.h"#include "HUtil.h" #include "HDict.h"#include "HNet.h"#include "HLM.h"typedef enum {unknown, wordLoop, boBiGram, matBiGram, multiLat, wordPair} BuildType;static int trace     = 0;           /* Trace flags */static LabId enterId;               /* id of !ENTRY label in ngram */static LabId exitId;                /* id of !EXIT label in ngram */static LabId bStartId=NULL;         /* id of start bracket */static LabId bEndId=NULL;           /* id of end bracket */static LabId unknownId;             /* id of unknown label in ngram */static Boolean zapUnknown = FALSE;  /* zap unknown symbols from bigram */MemHeap buildStack;/* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;            /* total num params *//* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){   int i;   nParm = GetConfig("HBUILD", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;   }}void ReportUsage(void){   printf("\nUSAGE: HBuild [options] wordList latFile\n\n");   printf(" Option                                       Default\n\n");   printf(" -b      binary lattice output                ASCII\n");   printf(" -m s    load matrix bigram from s            off\n");   printf(" -n s    load back-off bigram from s          off\n");   printf(" -s s1 s2 s1/s2 are bigram start/end labels   !ENTER !EXIT\n");   printf(" -t s1 s2 bracket word-loop/pair with s1 s2   off\n");   printf(" -u s    set unknown symbol to s              !NULL\n");   printf(" -w s    load word-pair grammar from s        off\n");   printf(" -x s    load multi-level lattice from s      off\n");   printf(" -z      ignore ngrams with unknown symbol    off\n");   PrintStdOpts("");    printf("\n\n");}int main(int argc, char *argv[]){   char *wordListFn,*latFn,*ipFn=NULL;   LModel *bigramLm;   BuildType bType = unknown;   Boolean saveLatBin = FALSE;   LatFormat format = HLAT_LMLIKE;   Lattice *lat,*ipLat;   Vocab voc;   char  *s;   Lattice *ProcessWordLoop(MemHeap *latHeap, Vocab *voc);   Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM);   void SaveLattice(Lattice *lat, char *latFn, LatFormat format);   Lattice *LoadLattice(MemHeap *latHeap, char *latFn, Vocab *voc,                        Boolean shortArc);   Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *fn);   if(InitShell(argc,argv,hbuild_version,hbuild_vc_id)<SUCCESS)      HError(3000,"HBuild: InitShell failed");   InitMem();   InitLabel();   InitMath();     InitDict();  InitNet();     InitLM();   CreateHeap(&buildStack, "HBuild Stack",  MSTAK, 1, 0.0, 100000, LONG_MAX );   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(0);   SetConfParms();   enterId=GetLabId("!ENTER",TRUE);   /* All sentences should or are coerced */   exitId=GetLabId("!EXIT",TRUE);     /*  to start enterId and end exitId */   unknownId=GetLabId("!NULL",TRUE);  /* Name for words not in list */   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(3019,"HBuild: Bad switch %s; must be single letter",s);      switch(s[0]){      case 'b':         saveLatBin = TRUE; break;          case 'm':         if (bType != unknown)            HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x");         bType = matBiGram;         if (NextArg()!=STRINGARG)            HError(3019,"HBuild: Matrix Bigram file name expected");         ipFn = GetStrArg();          break;      case 'n':         if (bType != unknown)            HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x");         bType = boBiGram;         if (NextArg()!=STRINGARG)            HError(3019,"HBuild: Back-off Bigram file name expected");         ipFn = GetStrArg();          break;      case 's':         if (NextArg() != STRINGARG)            HError(3019,"HBuild: Bigram ENTER label name expected");         enterId=GetLabId(GetStrArg(),TRUE);         if (NextArg() != STRINGARG)            HError(3019,"HBuild: Bigram EXIT label name expected");         exitId=GetLabId(GetStrArg(),TRUE);         break;      case 't':         if (NextArg() != STRINGARG)            HError(3019,"HBuild: Bracket start label name expected");         bStartId=GetLabId(GetStrArg(),TRUE);         if (NextArg() != STRINGARG)            HError(3019,"HBuild: Bracket end label name expected");         bEndId=GetLabId(GetStrArg(),TRUE);         break;      case 'u':         if (NextArg() != STRINGARG)            HError(3019,"HBuild: Unknown label name expected");         unknownId=GetLabId(GetStrArg(),TRUE);         break;      case 'w':         if (bType != unknown)            HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x");         bType = wordPair;         if (NextArg()!=STRINGARG)            HError(3019,"HBuild: Word pair grammar file name expected");         ipFn = GetStrArg();          break;      case 'x':         if (bType != unknown)            HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x");         bType = multiLat;         if (NextArg()!=STRINGARG)            HError(3019,"HBuild: Multi-level lattice file name expected");         ipFn = GetStrArg();          break;      case 'z':         zapUnknown = TRUE; break;          case 'T':         trace = GetChkedInt(0,511,s); break;      default:         HError(3019,"HBuild: Unknown switch %s",s);      }   }    if (NextArg()!=STRINGARG)      HError(3019,"HBuild: Word List file name expected");   wordListFn = GetStrArg();   if (NextArg()!=STRINGARG)      HError(3019,"HBuild: output lattice file name expected");   latFn = GetStrArg();   if (bType == unknown) bType = wordLoop;   if (saveLatBin) format |= HLAT_LBIN;      /* Read the word-list into a Vocab data structure */   InitVocab(&voc);   if(ReadDict(wordListFn, &voc)<SUCCESS)      HError(3013,"HBuild: ReadDict failed");   switch (bType) {   case matBiGram:      if (trace & T_TOP)         printf("Reading bigram from file %s\n",ipFn);      bigramLm = ReadLModel(&gstack, ipFn);      if (bigramLm->type != matBigram)         HError(3030,"HBuild: File specified is not a matrix bigram");      lat = ProcessBiGram(&gstack,&voc,bigramLm);      SaveLattice(lat,latFn,format);      break;   case boBiGram:      if (trace & T_TOP)         printf("Reading bigram from file %s\n",ipFn);      bigramLm = ReadLModel(&gstack, ipFn);      if (bigramLm->type != boNGram)         HError(3030,"HBuild: File specified is not a back-off bigram");      lat = ProcessBiGram(&gstack,&voc,bigramLm);      SaveLattice(lat,latFn,format);      break;   case multiLat:      if (trace & T_TOP)         printf("Reading input lattice from file %s\n",ipFn);      ipLat = LoadLattice(&buildStack,ipFn,&voc,FALSE);      if (ipLat->subList!=NULL) {         if (trace & T_TOP)            printf("Expanding multi-level lattice\n");         lat = ExpandMultiLevelLattice(&buildStack,ipLat,&voc);      }      else         lat = ipLat;      SaveLattice(lat,latFn,format);      break;   case wordLoop:      if (trace & T_TOP)         printf("Building word loop\n");      lat = ProcessWordLoop(&gstack,&voc);      SaveLattice(lat,latFn,format);      break;   case wordPair:      lat = ProcessWordPair(&gstack,&voc,ipFn);      SaveLattice(lat,latFn,format);      break;   default:      HError(3001,"Only Bigram LMs / multiLats currently implemented");   }   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* Save a lattice to a file latFn  */void SaveLattice(Lattice *lat, char *latFn, LatFormat format){   FILE *latf;   Boolean isPipe;   if (trace & T_TOP)      printf("Saving lattice to file %s\n",latFn);   if ( (latf = FOpen(latFn,NetOFilter,&isPipe)) == NULL)      HError(3011,"SaveLattice : Cannot create new lattice file  %s",latFn);   if(WriteLattice(lat,latf,format)<SUCCESS)      HError(3011,"SaveLattice : Cannot create new lattice file  %s",latFn);   FClose(latf,isPipe);}/* Load a lattice from file latFn  */Lattice *LoadLattice(MemHeap *latHeap, char *latFn, Vocab *voc,                     Boolean shortArc){   FILE *latf;   Boolean isPipe;   Lattice *lat;   if ( (latf = FOpen(latFn,NetFilter,&isPipe)) == NULL)      HError(3010,"LoadLattice : Cannot open lattice file %s",latFn);   if((lat = ReadLattice(latf,latHeap,voc,shortArc,FALSE))==NULL)      HError(3010,"LoadLattice : ReadLattice failed");   FClose(latf,isPipe);   return lat;}Lattice *ProcessWordLoop(MemHeap *latHeap, Vocab *voc){   int nNode,nArc;   LNode *ln;   LArc *la;   Word wd;   Lattice *lat;   int i;   nNode = voc->nwords+4;    nArc =  voc->nwords*2 + 3;      lat = NewLattice(latHeap,nNode,nArc);   lat->voc = voc;   lat->lmscale = 1.0; lat->wdpenalty = 0.0;    /* fill in start/end/loop word entries with !NULL */   wd = voc->nullWord;   ln = lat->lnodes; ln->word = wd; ln->n=0; ln->v=0;   ln = lat->lnodes+1; ln->word = wd; ln->n=0; ln->v=0;   ln = lat->lnodes+nNode-1; ln->word = wd; ln->n=0; ln->v=0;   ln = lat->lnodes+nNode-2; ln->word = wd; ln->n=0; ln->v=0;   ln = lat->lnodes+2;   for (i = 0; i< VHASHSIZE; i++)      for ( wd = voc->wtab[i]; wd != NULL; wd = wd->next )          if ((wd != voc->nullWord) && (wd != voc->subLatWord)) {            ln->word = wd;            ln++;         }   la =lat->larcs;   la->start = lat->lnodes;     la->end = lat->lnodes+1;    la->lmlike = 0.0;   la = lat->larcs+1;   la->start = lat->lnodes+nNode-2;     la->end = lat->lnodes+nNode-1;   la->lmlike = 0.0;   la = lat->larcs+2;   la->start = lat->lnodes+nNode-2;      la->end = lat->lnodes+1;   la->lmlike = 0.0;   la = lat->larcs+3;   for (i = 0; i < voc->nwords; i++) {      la->start = lat->lnodes+1;      la->end = lat->lnodes+2+i;      la->lmlike = log(1.0/(float) (voc->nwords));      la++;   }   for (i = 0; i < voc->nwords; i++) {      la->start = lat->lnodes+2+i;      la->end = lat->lnodes+nNode-2;      la->lmlike = 0.0;      la++;   }   /* finally overwrite start/end !NULL words if sil at start/end */   if (bStartId != NULL) {      wd = GetWord(voc,bStartId,TRUE);      ln = lat->lnodes; ln->word = wd;      wd = GetWord(voc,bEndId,TRUE);      ln = lat->lnodes+nNode-1; ln->word = wd;   }   return lat;}/*ProcessBoBiGram: Convert back-off bigram in nLM into lattice */ Lattice *ProcessBoBiGram(MemHeap *latHeap, Vocab *voc, NGramLM *nLM){   int nNode,nArc;   NEntry *ne;   SEntry *se;   Word wd,fromWd,toWd;   LNode *ln,*fromNode,*toNode;   LArc *la;   lmId ndx[NSIZE+1];     int i,j,k;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -