hvite.c

来自「Hidden Markov Toolkit (HTK) 3.2.1 HTK i」· C语言代码 · 共 1,016 行 · 第 1/3 页
1,016 行
            HError(3219,"HVite: HMM file extension expected");         hmmExt = GetStrArg(); break;      case 'y':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Output label file extension expected");         labExt = GetStrArg(); break;      case 'z':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Lattice output file extension expected");         latExt = GetStrArg(); break;      case 'F':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Data File format expected");         if((dfmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Input file format set");         break;      case 'G':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Source Label File format expected");         if((ifmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Input file format set");         break;      case 'H':         if (NextArg() != STRINGARG)            HError(3219,"HVite: MMF File name expected");         AddMMF(&hset,GetStrArg());          break;      case 'I':         if (NextArg() != STRINGARG)            HError(3219,"HVite: MLF file name expected");         LoadMasterFile(GetStrArg()); break;      case 'L':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Label/network file directory expected");         labInDir = GetStrArg(); break;      case 'P':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Target Label File format expected");         if((ofmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Label output file format set");         break;      case 'B':         saveBinary = TRUE;         break;      case 'T':         trace = GetChkedInt(0,511,s); break;      case 'X':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Input label/network file extension expected");         labInExt = GetStrArg(); break;      case 'h':	if (NextArg()!=STRINGARG)	  HError(1,"Speaker name pattern expected");	xfInfo.outSpkrPat = GetStrArg();	if (NextArg()==STRINGARG) {	  xfInfo.inSpkrPat = GetStrArg();	  if (NextArg()==STRINGARG)	    xfInfo.paSpkrPat = GetStrArg(); 	}	if (NextArg() != SWITCHARG)	  HError(2319,"HERest: cannot have -h as the last option");	  	break;      case 'E':         if (NextArg()!=STRINGARG)            HError(2319,"HERest: parent transform directory expected");	 xfInfo.usePaXForm = TRUE;         xfInfo.paXFormDir = GetStrArg();          if (NextArg()==STRINGARG)	   xfInfo.paXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -E as the last option");	           break;                    case 'J':         if (NextArg()!=STRINGARG)            HError(2319,"HERest: input transform directory expected");         AddInXFormDir(&hset,GetStrArg());         if (NextArg()==STRINGARG)	   xfInfo.inXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -J as the last option");	           break;                    case 'K':         if (NextArg()!=STRINGARG)            HError(2319,"HVite: output transform directory expected");         xfInfo.outXFormDir = GetStrArg(); 	 xfInfo.useOutXForm = TRUE;         if (NextArg()==STRINGARG)	   xfInfo.outXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -K as the last option");	           break;                    default:         HError(3219,"HVite: Unknown switch %s",s);      }   }      if (NextArg()!=STRINGARG)      HError(3219,"HVite: Dictionary file name expected");   dictFn = GetStrArg();   if (NextArg()!=STRINGARG)      HError(3219,"HVite: HMM list  file name expected");   hmmListFn = GetStrArg();   if ((states || models) && nToks>1)      HError(3230,"HVite: Alignment using multiple tokens is not supported");   if (NumArgs()==0 && wdNetFn==NULL)      HError(3230,"HVite: Network must be specified for recognition from audio");   if (loadNetworks && loadLabels)      HError(3230,"HVite: Must choose either alignment from network or labels");   if (nToks>1 && latExt==NULL && nTrans==1)      HError(-3230,"HVite: Performing nbest recognition with no nbest output");   if ((update>0) && (!xfInfo.useOutXForm))      HError(3230,"HVite: Must use -K option with incremental adaptation");   Initialise();   /* Process the data */   if (wdNetFn==NULL)      DoAlignment();   else      DoRecognition();   /* Free up and we are done */   if (trace & T_MEM) {      printf("Memory State on Completion\n");      PrintAllHeapStats();   }   DeleteVRecInfo(vri);   ResetHeap(&netHeap);   FreePSetInfo(psi);   UpdateSpkrStats(&hset,&xfInfo, NULL);    ResetHeap(&regHeap);   ResetHeap(&modelHeap);   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* --------------------------- Initialisation ----------------------- *//* Initialise: set up global data structures */void Initialise(void){   Boolean eSep;   int s;   /* Load hmms, convert to inverse DiagC */   if(MakeHMMSet(&hset,hmmListFn)<SUCCESS)       HError(3228,"Initialise: MakeHMMSet failed");   if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS)       HError(3228,"Initialise: LoadHMMSet failed");   ConvDiagC(&hset,TRUE);      /* Create observation and storage for input buffer */   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,                       hset.hsKind==DISCRETEHS,eSep);   /* sort out masks just in case using adaptation */   if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat;    if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat;    if (xfInfo.useOutXForm || (update>0)) {      CreateHeap(&regHeap,   "regClassStore",  MSTAK, 1, 0.5, 1000, 8000 );      /* This initialises things - temporary hack - THINK!! */      CreateAdaptXForm(&hset, "tmp");      /* initialise structures for the f-b frame-state alignment pass */      utt = (UttInfo *) New(&regHeap, sizeof(UttInfo));      fbInfo = (FBInfo *) New(&regHeap, sizeof(FBInfo));      /* initialise a recogniser for frame/state alignment purposes */      alignpsi=InitPSetInfo(&hset);      alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE);      SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam);      InitUttInfo(utt, FALSE);      InitialiseForBack(fbInfo, &regHeap, &hset,                        (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0,                         genBeam*4.0+1.0, 10.0);      utt->twoDataFiles = FALSE;      utt->S = hset.swidth[0];       AttachPreComps(&hset,hset.hmem);   }       /* Create observation and storage for input buffer */   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,                       hset.hsKind==DISCRETEHS,eSep);   CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000);   CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000);      maxM = MaxMixInSet(&hset);   for (s=1; s<=hset.swidth[0]; s++)      maxMixInS[s] = MaxMixInSetS(&hset, s);   if (trace&T_TOP) {      printf("Read %d physical / %d logical HMMs\n",             hset.numPhyHMM,hset.numLogHMM);  fflush(stdout);   }      /* Initialise recogniser */   if (nToks>1) nBeam=genBeam;   psi=InitPSetInfo(&hset);   vri=InitVRecInfo(psi,nToks,models,states);   /* Read dictionary and create storage for lattice */   InitVocab(&vocab);      if(ReadDict(dictFn,&vocab)<SUCCESS)       HError(3213, "Main: ReadDict failed");   CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000);   if (trace & T_MEM){      printf("Memory State After Initialisation\n");      PrintAllHeapStats();   }}/* ------------------ Utterance Level Recognition  ----------------------- *//* ReplayAudio:  replay the last audio input */void ReplayAudio(BufferInfo info){   AudioOut ao;   if (info.a != NULL) {      ao = OpenAudioOutput(&repHeap,&(info.srcSampRate));      PlayReplayBuffer(ao, info.a);      while (SamplesToPlay(ao) > 0 );      CloseAudioOutput(ao);   }}/* DoOnlineAdaptation: Perform unsupervised online adaptation   using the recognition hypothesis as the transcription */int DoOnlineAdaptation(Lattice *lat, ParmBuf pbuf, int nFrames){   Transcription *modelTrans, *trans;   BufferInfo pbinfo;   Lattice *alignLat, *wordNet;   Network *alignNet;   int i;   GetBufferInfo(pbuf,&pbinfo);   trans=TranscriptionFromLattice(&netHeap,lat,1);   wordNet=LatticeFromLabels(GetLabelList(trans,1),bndId,                             &vocab,&netHeap);   alignNet=ExpandWordNet(&netHeap,wordNet,&vocab,&hset);   StartRecognition(alignvri,alignNet,0.0,0.0,0.0);        /* do forced alignment */   for (i = 0; i < nFrames; i++) {      ReadAsTable(pbuf, i, &obs);      ProcessObservation(alignvri,&obs,-1,xfInfo.inXForm);   }       alignLat=CompleteRecognition(alignvri,                                pbinfo.tgtSampRate/10000000.0,                                &netHeap);           if (alignvri->noTokenSurvived) {      Dispose(&netHeap, trans);      /* Return value 0 to indicate zero frames process failed */      return 0;   }   modelTrans=TranscriptionFromLattice(&netHeap,alignLat,1);         /* format the transcription so that it contains just the models */   FormatTranscription(modelTrans,pbinfo.tgtSampRate,FALSE,TRUE,                       FALSE,FALSE,TRUE,FALSE,TRUE,TRUE, FALSE);   /* Now do the frame/state alignment accumulating MLLR statistics */   /* set the various values in the utterance storage */   utt->tr = modelTrans;   utt->pbuf = pbuf;   utt->Q = CountLabs(utt->tr->head);   utt->T = nFrames;   utt->ot = obs;     /* do frame state alignment and accumulate statistics */   fbInfo->inXForm = xfInfo.inXForm;   fbInfo->al_inXForm = xfInfo.inXForm;   fbInfo->paXForm = xfInfo.paXForm;   if (!FBFile(fbInfo, utt, NULL))     nFrames = 0;   Dispose(&netHeap, trans);   if (trace&T_TOP) {      printf("Accumulated statistics...\n");       fflush(stdout);   }   return nFrames;} /* ProcessFile: process given file. If fn=NULL then direct audio */Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable){   FILE *file;   ParmBuf pbuf;   BufferInfo pbinfo;   NetNode *d;   Lattice *lat;   LArc *arc,*cur;   LNode *node;   Transcription *trans;   MLink m;   LogFloat lmlk,aclk;   int s,j,tact,nFrames;   LatFormat form;   char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN];   Boolean enableOutput = TRUE, isPipe;   if (fn!=NULL)      strcpy(thisFN,fn);   else if (fn==NULL && saveAudioOut)      CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN);   else       enableOutput = FALSE;         if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL)      HError(3250,"ProcessFile: Config parameters invalid");      /* Check pbuf same as hset */   GetBufferInfo(pbuf,&pbinfo);   if (pbinfo.tgtPK!=hset.pkind)      HError(3231,"ProcessFile: Incompatible sample kind %s vs %s",             ParmKind2Str(pbinfo.tgtPK,buf1),             ParmKind2Str(hset.pkind,buf2));   if (pbinfo.a != NULL && replay)  AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate)));   StartRecognition(vri,net,lmScale,wordPen,prScale);   SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam);    tact=0;nFrames=0;   StartBuffer(pbuf);   while(BufferStatus(pbuf)!=PB_CLEARED) {      ReadAsBuffer(pbuf,&obs);      if (trace&T_OBS) PrintObservation(nFrames,&obs,13);
hvite.c - 源码说明

本页面展示了「Hidden Markov Toolkit (HTK) 3.2.1 HTK is a toolkit for use in research into automatic speech recogn」中的 hvite.c 源码文件，采用 C语言编程语言编写，共 1,016 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与HTK相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?