📄 hvite.c

📁 实现HMM算法
💻 C
📖 第 1 页 / 共 3 页
字号:
            HError(3219,"HVite: HMM file extension expected");         hmmExt = GetStrArg(); break;      case 'y':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Output label file extension expected");         labExt = GetStrArg(); break;      case 'z':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Lattice output file extension expected");         latExt = GetStrArg(); break;      case 'F':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Data File format expected");         if((dfmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Input file format set");         break;      case 'G':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Source Label File format expected");         if((ifmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Input file format set");         break;      case 'H':/*已经用*/         if (NextArg() != STRINGARG)            HError(3219,"HVite: MMF File name expected");         AddMMF(&hset,GetStrArg());          break;      case 'I':         if (NextArg() != STRINGARG)            HError(3219,"HVite: MLF file name expected");         LoadMasterFile(GetStrArg()); break;      case 'L':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Label/network file directory expected");         labInDir = GetStrArg(); break;      case 'P':         if (NextArg() != STRINGARG)            HError(3219,"HVite: Target Label File format expected");         if((ofmt = Str2Format(GetStrArg())) == ALIEN)            HError(-3289,"HVite: Warning ALIEN Label output file format set");         break;      case 'B':         saveBinary = TRUE;         break;      case 'T':         trace = GetChkedInt(0,511,s); break;      case 'X':         if (NextArg()!=STRINGARG)            HError(3219,"HVite: Input label/network file extension expected");         labInExt = GetStrArg(); break;      case 'h':	if (NextArg()!=STRINGARG)	  HError(1,"Speaker name pattern expected");	xfInfo.outSpkrPat = GetStrArg();	if (NextArg()==STRINGARG) {	  xfInfo.inSpkrPat = GetStrArg();	  if (NextArg()==STRINGARG)	    xfInfo.paSpkrPat = GetStrArg(); 	}	if (NextArg() != SWITCHARG)	  HError(2319,"HERest: cannot have -h as the last option");	  	break;      case 'E':         if (NextArg()!=STRINGARG)            HError(2319,"HERest: parent transform directory expected");	 xfInfo.usePaXForm = TRUE;         xfInfo.paXFormDir = GetStrArg();          if (NextArg()==STRINGARG)	   xfInfo.paXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -E as the last option");	           break;                    case 'J':         if (NextArg()!=STRINGARG)            HError(2319,"HERest: input transform directory expected");         AddInXFormDir(&hset,GetStrArg());         if (NextArg()==STRINGARG)	   xfInfo.inXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -J as the last option");	           break;                    case 'K':         if (NextArg()!=STRINGARG)            HError(2319,"HVite: output transform directory expected");         xfInfo.outXFormDir = GetStrArg(); 	 xfInfo.useOutXForm = TRUE;         if (NextArg()==STRINGARG)	   xfInfo.outXFormExt = GetStrArg(); 	 if (NextArg() != SWITCHARG)	   HError(2319,"HVite: cannot have -K as the last option");	           break;                    default:         HError(3219,"HVite: Unknown switch %s",s);      }   }      if (NextArg()!=STRINGARG)      HError(3219,"HVite: Dictionary file name expected");   dictFn = GetStrArg();/*获得字典文件名*/   if (NextArg()!=STRINGARG)      HError(3219,"HVite: HMM list  file name expected");   hmmListFn = GetStrArg();/*获得模型列表文件名*/   if ((states || models) && nToks>1)      HError(3230,"HVite: Alignment using multiple tokens is not supported");   if (NumArgs()==0 && wdNetFn==NULL)      HError(3230,"HVite: Network must be specified for recognition from audio");   if (loadNetworks && loadLabels)      HError(3230,"HVite: Must choose either alignment from network or labels");   if (nToks>1 && latExt==NULL && nTrans==1)      HError(-3230,"HVite: Performing nbest recognition with no nbest output");   if ((update>0) && (!xfInfo.useOutXForm))      HError(3230,"HVite: Must use -K option with incremental adaptation");   Initialise();   /* Process the data */   if (wdNetFn==NULL)      DoAlignment();   else      DoRecognition();   /* Free up and we are done */   if (trace & T_MEM) {      printf("Memory State on Completion\n");      PrintAllHeapStats();   }   DeleteVRecInfo(vri);   ResetHeap(&netHeap);   FreePSetInfo(psi);   UpdateSpkrStats(&hset,&xfInfo, NULL);    ResetHeap(&regHeap);   ResetHeap(&modelHeap);   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* --------------------------- Initialisation ----------------------- *//* Initialise: set up global data structures */void Initialise(void){   Boolean eSep;   int s;   /* Load hmms, convert to inverse DiagC */   if(MakeHMMSet(&hset,hmmListFn)<SUCCESS)/*从音素列表hmmListFn建立HMMSet*/       HError(3228,"Initialise: MakeHMMSet failed");   if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS) /*导入模型数据，当前hmmDir=null,hmmExt=null*/      HError(3228,"Initialise: LoadHMMSet failed");   ConvDiagC(&hset,TRUE);/*将模型的协方差矩阵转换成对角阵*/      /* Create observation and storage for input buffer */   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,                       hset.hsKind==DISCRETEHS,eSep);   /* sort out masks just in case using adaptation */   if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; /*当前执行此处*/   if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; /*当前执行此处*/   if (xfInfo.useOutXForm || (update>0)) {/*当前HVite.c做识别时，此判断条件为假*/      CreateHeap(&regHeap,   "regClassStore",  MSTAK, 1, 0.5, 1000, 8000 );      /* This initialises things - temporary hack - THINK!! */      CreateAdaptXForm(&hset, "tmp");      /* initialise structures for the f-b frame-state alignment pass */      utt = (UttInfo *) New(&regHeap, sizeof(UttInfo));      fbInfo = (FBInfo *) New(&regHeap, sizeof(FBInfo));      /* initialise a recogniser for frame/state alignment purposes */      alignpsi=InitPSetInfo(&hset);      alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE);      SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam);      InitUttInfo(utt, FALSE);      InitialiseForBack(fbInfo, &regHeap, &hset,                        (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0,                         genBeam*4.0+1.0, 10.0);      utt->twoDataFiles = FALSE;      utt->S = hset.swidth[0];       AttachPreComps(&hset,hset.hmem);   }/*当前HVite.c做识别时，此判断条件为假*/       /* Create observation and storage for input buffer */   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,                       hset.hsKind==DISCRETEHS,eSep);   CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000);   CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000);      maxM = MaxMixInSet(&hset);/*当前maxM=*/   for (s=1; s<=hset.swidth[0]; s++)/*当前hset.swidth[0]=1*/      maxMixInS[s] = MaxMixInSetS(&hset, s);/*当前MaxMixInSetS(&hset, s)=1*/   if (trace&T_TOP) {      printf("Read %d physical / %d logical HMMs\n",             hset.numPhyHMM,hset.numLogHMM);  fflush(stdout);   }      /* Initialise recogniser */   if (nToks>1) nBeam=genBeam;/*当前HVite.c做识别时，此判断条件为假*//*static int nToks = 0;  Number of tokens for N best */
    /*PSetInfo *psi：    Private data used by HRec ；VRecInfo *vri：  Visible HRec Info */   psi=InitPSetInfo(&hset);/* Prepare HMMSet for recognition.  Allocates seIndex and preComp from *//*  hmmset heap.*/   vri=InitVRecInfo(psi,nToks,models,states);/* 当前nToks=0;states和model真值为假,static Boolean states = FALSE; Keep track of state alignment ,static Boolean models = FALSE;  Keep track of model alignment */   /* Read dictionary and create storage for lattice */   InitVocab(&vocab);      if(ReadDict(dictFn,&vocab)<SUCCESS)       HError(3213, "Main: ReadDict failed");   CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000);   if (trace & T_MEM){      printf("Memory State After Initialisation\n");      PrintAllHeapStats();   }}/* ------------------ Utterance Level Recognition  ----------------------- *//* ReplayAudio:  replay the last audio input */void ReplayAudio(BufferInfo info){   AudioOut ao;   if (info.a != NULL) {      ao = OpenAudioOutput(&repHeap,&(info.srcSampRate));      PlayReplayBuffer(ao, info.a);      while (SamplesToPlay(ao) > 0 );      CloseAudioOutput(ao);   }}/* DoOnlineAdaptation: Perform unsupervised online adaptation   using the recognition hypothesis as the transcription */int DoOnlineAdaptation(Lattice *lat, ParmBuf pbuf, int nFrames){   Transcription *modelTrans, *trans;   BufferInfo pbinfo;   Lattice *alignLat, *wordNet;   Network *alignNet;   int i;   GetBufferInfo(pbuf,&pbinfo);   trans=TranscriptionFromLattice(&netHeap,lat,1);   wordNet=LatticeFromLabels(GetLabelList(trans,1),bndId,                             &vocab,&netHeap);   alignNet=ExpandWordNet(&netHeap,wordNet,&vocab,&hset);   StartRecognition(alignvri,alignNet,0.0,0.0,0.0);        /* do forced alignment */   for (i = 0; i < nFrames; i++) {      ReadAsTable(pbuf, i, &obs);      ProcessObservation(alignvri,&obs,-1,xfInfo.inXForm);   }       alignLat=CompleteRecognition(alignvri,                                pbinfo.tgtSampRate/10000000.0,                                &netHeap);           if (alignvri->noTokenSurvived) {      Dispose(&netHeap, trans);      /* Return value 0 to indicate zero frames process failed */      return 0;   }   modelTrans=TranscriptionFromLattice(&netHeap,alignLat,1);         /* format the transcription so that it contains just the models */   FormatTranscription(modelTrans,pbinfo.tgtSampRate,FALSE,TRUE,                       FALSE,FALSE,TRUE,FALSE,TRUE,TRUE, FALSE);   /* Now do the frame/state alignment accumulating MLLR statistics */   /* set the various values in the utterance storage */   utt->tr = modelTrans;   utt->pbuf = pbuf;   utt->Q = CountLabs(utt->tr->head);   utt->T = nFrames;   utt->ot = obs;     /* do frame state alignment and accumulate statistics */   fbInfo->inXForm = xfInfo.inXForm;   fbInfo->al_inXForm = xfInfo.inXForm;   fbInfo->paXForm = xfInfo.paXForm;   if (!FBFile(fbInfo, utt, NULL))     nFrames = 0;   Dispose(&netHeap, trans);   if (trace&T_TOP) {      printf("Accumulated statistics...\n");       fflush(stdout);   }   return nFrames;} /* ProcessFile: process given file. If fn=NULL then direct audio */Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable){   FILE *file;   ParmBuf pbuf;   BufferInfo pbinfo;   NetNode *d;   Lattice *lat;   LArc *arc,*cur;   LNode *node;   Transcription *trans;   MLink m;   LogFloat lmlk,aclk;   int s,j,tact,nFrames;   LatFormat form;   char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN];   Boolean enableOutput = TRUE, isPipe;   /*printf("\n当前正在处理文件%s\n",fn);*/   if (fn!=NULL)      strcpy(thisFN,fn);   else if (fn==NULL && saveAudioOut)      CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN);   else       enableOutput = FALSE;       if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL)/*当前文件格式参数dfmt=UNDEFF，TRI_UNDEF=-1*/      HError(3250,"ProcessFile: Config parameters invalid");      /* Check pbuf same as hset */   GetBufferInfo(pbuf,&pbinfo);   if (pbinfo.tgtPK!=hset.pkind)      HError(3231,"ProcessFile: Incompatible sample kind %s vs %s",             ParmKind2Str(pbinfo.tgtPK,buf1),             ParmKind2Str(hset.pkind,buf2));   if (pbinfo.a != NULL && replay)  AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate)));/*当前此判断条件为假*/   StartRecognition(vri,net,lmScale,wordPen,prScale);/* 根据已有的网络net初始化可见的识别信息vri,当前lmScale=5.000000,wordPen=0.000000,prScale=1.000000，lmScale 是bigram and log(1/NSucc) scale factor，wordPen是 inter model propagation log prob，prScale是 pronunciation scale factor */   SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam);/*当前maxActive=0,currGenBeam=10000000000.000000,wordBeam=10000000000.000000,nBeam=0.000000,tmBeam=10.000000*/    tact=0;nFrames=0;   StartBuffer(pbuf);   while(BufferStatus(pbuf)!=PB_CLEARED) {      ReadAsBuffer(pbuf,&obs);/*从缓冲中读出一个观察给obs*/
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -