📄 hvite.c
字号:
HError(3219,"HVite: HMM file extension expected"); hmmExt = GetStrArg(); break; case 'y': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output label file extension expected"); labExt = GetStrArg(); break; case 'z': if (NextArg()!=STRINGARG) HError(3219,"HVite: Lattice output file extension expected"); latExt = GetStrArg(); break; case 'F': if (NextArg() != STRINGARG) HError(3219,"HVite: Data File format expected"); if((dfmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Input file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(3219,"HVite: Source Label File format expected"); if((ifmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Input file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(3219,"HVite: MMF File name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(3219,"HVite: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(3219,"HVite: Label/network file directory expected"); labInDir = GetStrArg(); break; case 'P': if (NextArg() != STRINGARG) HError(3219,"HVite: Target Label File format expected"); if((ofmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Label output file format set"); break; case 'B': saveBinary = TRUE; break; case 'T': trace = GetChkedInt(0,511,s); break; case 'X': if (NextArg()!=STRINGARG) HError(3219,"HVite: Input label/network file extension expected"); labInExt = GetStrArg(); break; case 'h': if (NextArg()!=STRINGARG) HError(1,"Speaker name pattern expected"); xfInfo.outSpkrPat = GetStrArg(); if (NextArg()==STRINGARG) { xfInfo.inSpkrPat = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paSpkrPat = GetStrArg(); } if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -h as the last option"); break; case 'E': if (NextArg()!=STRINGARG) HError(2319,"HERest: parent transform directory expected"); xfInfo.usePaXForm = TRUE; xfInfo.paXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -E as the last option"); break; case 'J': if (NextArg()!=STRINGARG) HError(2319,"HERest: input transform directory expected"); AddInXFormDir(&hset,GetStrArg()); if (NextArg()==STRINGARG) xfInfo.inXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -J as the last option"); break; case 'K': if (NextArg()!=STRINGARG) HError(2319,"HVite: output transform directory expected"); xfInfo.outXFormDir = GetStrArg(); xfInfo.useOutXForm = TRUE; if (NextArg()==STRINGARG) xfInfo.outXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -K as the last option"); break; default: HError(3219,"HVite: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(3219,"HVite: Dictionary file name expected"); dictFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(3219,"HVite: HMM list file name expected"); hmmListFn = GetStrArg(); if ((states || models) && nToks>1) HError(3230,"HVite: Alignment using multiple tokens is not supported"); if (NumArgs()==0 && wdNetFn==NULL) HError(3230,"HVite: Network must be specified for recognition from audio"); if (loadNetworks && loadLabels) HError(3230,"HVite: Must choose either alignment from network or labels"); if (nToks>1 && latExt==NULL && nTrans==1) HError(-3230,"HVite: Performing nbest recognition with no nbest output"); if ((update>0) && (!xfInfo.useOutXForm)) HError(3230,"HVite: Must use -K option with incremental adaptation"); Initialise(); /* Process the data */ if (wdNetFn==NULL) DoAlignment(); else DoRecognition(); /* Free up and we are done */ if (trace & T_MEM) { printf("Memory State on Completion\n"); PrintAllHeapStats(); } DeleteVRecInfo(vri); ResetHeap(&netHeap); FreePSetInfo(psi); UpdateSpkrStats(&hset,&xfInfo, NULL); ResetHeap(®Heap); ResetHeap(&modelHeap); Exit(0); return (0); /* never reached -- make compiler happy */}/* --------------------------- Initialisation ----------------------- *//* Initialise: set up global data structures */void Initialise(void){ Boolean eSep; int s; /* Load hmms, convert to inverse DiagC */ if(MakeHMMSet(&hset,hmmListFn)<SUCCESS) HError(3228,"Initialise: MakeHMMSet failed"); if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS) HError(3228,"Initialise: LoadHMMSet failed"); ConvDiagC(&hset,TRUE); /* Create observation and storage for input buffer */ SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep); obs=MakeObservation(&gstack,hset.swidth,hset.pkind, hset.hsKind==DISCRETEHS,eSep); /* sort out masks just in case using adaptation */ if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; if (xfInfo.useOutXForm || (update>0)) { CreateHeap(®Heap, "regClassStore", MSTAK, 1, 0.5, 1000, 8000 ); /* This initialises things - temporary hack - THINK!! */ CreateAdaptXForm(&hset, "tmp"); /* initialise structures for the f-b frame-state alignment pass */ utt = (UttInfo *) New(®Heap, sizeof(UttInfo)); fbInfo = (FBInfo *) New(®Heap, sizeof(FBInfo)); /* initialise a recogniser for frame/state alignment purposes */ alignpsi=InitPSetInfo(&hset); alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE); SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam); InitUttInfo(utt, FALSE); InitialiseForBack(fbInfo, ®Heap, &hset, (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0, genBeam*4.0+1.0, 10.0); utt->twoDataFiles = FALSE; utt->S = hset.swidth[0]; AttachPreComps(&hset,hset.hmem); } /* Create observation and storage for input buffer */ SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep); obs=MakeObservation(&gstack,hset.swidth,hset.pkind, hset.hsKind==DISCRETEHS,eSep); CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000); CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000); maxM = MaxMixInSet(&hset); for (s=1; s<=hset.swidth[0]; s++) maxMixInS[s] = MaxMixInSetS(&hset, s); if (trace&T_TOP) { printf("Read %d physical / %d logical HMMs\n", hset.numPhyHMM,hset.numLogHMM); fflush(stdout); } /* Initialise recogniser */ if (nToks>1) nBeam=genBeam; psi=InitPSetInfo(&hset); vri=InitVRecInfo(psi,nToks,models,states); /* Read dictionary and create storage for lattice */ InitVocab(&vocab); if(ReadDict(dictFn,&vocab)<SUCCESS) HError(3213, "Main: ReadDict failed"); CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000); if (trace & T_MEM){ printf("Memory State After Initialisation\n"); PrintAllHeapStats(); }}/* ------------------ Utterance Level Recognition ----------------------- *//* ReplayAudio: replay the last audio input */void ReplayAudio(BufferInfo info){ AudioOut ao; if (info.a != NULL) { ao = OpenAudioOutput(&repHeap,&(info.srcSampRate)); PlayReplayBuffer(ao, info.a); while (SamplesToPlay(ao) > 0 ); CloseAudioOutput(ao); }}/* DoOnlineAdaptation: Perform unsupervised online adaptation using the recognition hypothesis as the transcription */int DoOnlineAdaptation(Lattice *lat, ParmBuf pbuf, int nFrames){ Transcription *modelTrans, *trans; BufferInfo pbinfo; Lattice *alignLat, *wordNet; Network *alignNet; int i; GetBufferInfo(pbuf,&pbinfo); trans=TranscriptionFromLattice(&netHeap,lat,1); wordNet=LatticeFromLabels(GetLabelList(trans,1),bndId, &vocab,&netHeap); alignNet=ExpandWordNet(&netHeap,wordNet,&vocab,&hset); StartRecognition(alignvri,alignNet,0.0,0.0,0.0); /* do forced alignment */ for (i = 0; i < nFrames; i++) { ReadAsTable(pbuf, i, &obs); ProcessObservation(alignvri,&obs,-1,xfInfo.inXForm); } alignLat=CompleteRecognition(alignvri, pbinfo.tgtSampRate/10000000.0, &netHeap); if (alignvri->noTokenSurvived) { Dispose(&netHeap, trans); /* Return value 0 to indicate zero frames process failed */ return 0; } modelTrans=TranscriptionFromLattice(&netHeap,alignLat,1); /* format the transcription so that it contains just the models */ FormatTranscription(modelTrans,pbinfo.tgtSampRate,FALSE,TRUE, FALSE,FALSE,TRUE,FALSE,TRUE,TRUE, FALSE); /* Now do the frame/state alignment accumulating MLLR statistics */ /* set the various values in the utterance storage */ utt->tr = modelTrans; utt->pbuf = pbuf; utt->Q = CountLabs(utt->tr->head); utt->T = nFrames; utt->ot = obs; /* do frame state alignment and accumulate statistics */ fbInfo->inXForm = xfInfo.inXForm; fbInfo->al_inXForm = xfInfo.inXForm; fbInfo->paXForm = xfInfo.paXForm; if (!FBFile(fbInfo, utt, NULL)) nFrames = 0; Dispose(&netHeap, trans); if (trace&T_TOP) { printf("Accumulated statistics...\n"); fflush(stdout); } return nFrames;} /* ProcessFile: process given file. If fn=NULL then direct audio */Boolean ProcessFile(char *fn, Network *net, int utterNum, LogDouble currGenBeam, Boolean restartable){ FILE *file; ParmBuf pbuf; BufferInfo pbinfo; NetNode *d; Lattice *lat; LArc *arc,*cur; LNode *node; Transcription *trans; MLink m; LogFloat lmlk,aclk; int s,j,tact,nFrames; LatFormat form; char *p,lfn[255],buf1[80],buf2[80],thisFN[MAXSTRLEN]; Boolean enableOutput = TRUE, isPipe; if (fn!=NULL) strcpy(thisFN,fn); else if (fn==NULL && saveAudioOut) CounterFN(roPrefix,roSuffix,++roCounter,4,thisFN); else enableOutput = FALSE; if((pbuf = OpenBuffer(&bufHeap,fn,50,dfmt,TRI_UNDEF,TRI_UNDEF))==NULL) HError(3250,"ProcessFile: Config parameters invalid"); /* Check pbuf same as hset */ GetBufferInfo(pbuf,&pbinfo); if (pbinfo.tgtPK!=hset.pkind) HError(3231,"ProcessFile: Incompatible sample kind %s vs %s", ParmKind2Str(pbinfo.tgtPK,buf1), ParmKind2Str(hset.pkind,buf2)); if (pbinfo.a != NULL && replay) AttachReplayBuf(pbinfo.a, (int) (3*(1.0E+07/pbinfo.srcSampRate))); StartRecognition(vri,net,lmScale,wordPen,prScale); SetPruningLevels(vri,maxActive,currGenBeam,wordBeam,nBeam,tmBeam); tact=0;nFrames=0; StartBuffer(pbuf); while(BufferStatus(pbuf)!=PB_CLEARED) { ReadAsBuffer(pbuf,&obs); if (trace&T_OBS) PrintObservation(nFrames,&obs,13);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -