📄 ladapt.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
      HError(16419,"LAdapt: language model file name expected");   outFN = CopyString(&gstack,GetStrArg());   Initialise();   if (processText) {      if (NextArg() != STRINGARG)	 ProcessText(NULL,TRUE);       /* input from stdin */      else	 while (NextArg() == STRINGARG) {	    /* !! copy string argument since it gets overwritten 	       by NextArg() when reading from script file */	    fn = CopyString(&gstack,GetStrArg());	    ProcessText(fn,NextArg() != STRINGARG);	 }      if (NumArgs() != 0)	 HError(-16419,"LAdapt: unused args left on cmd line");      for (i=0; i<stdBuf.ngb->fndx; i++) {	 sprintf(sBuf,"%s.%d",stdBuf.ngb->fn,i);  	 AddInputGFile(&inSet,sBuf,1.0);      }      ResetHeap(&langHeap);   } else {      for (i=0; i<MAX_NGRAM_FILES; i++) {	 sprintf(sBuf,"%s.%d",rootFN,i);	 if (!Exists(sBuf))	    break;	 AddInputGFile(&inSet,sBuf,1.0);      }      if (i==MAX_NGRAM_FILES)      {	HError(-16419, "LAdapt: Only %d n-gram files read (recompile with different setting\nof MAX_NGRAM_FILES");      }   }   if (nLModel==1) {      adpLM = GenerateModel(&langHeap,&binfo);   } else {      if (binfo.ptype==LMP_COUNT) 	 binfo.ptype = LMP_FLOAT;      newLM = GenerateModel(&langHeap,&binfo);      lmInfo[0].lm = newLM;      lmInfo[0].fn = "unknown";      /* combine all models into one */      adpLM = CombineModels(&langHeap,lmInfo,nLModel,nSize,tgtVoc);   }#ifdef HTK_TRANSCRIBER#ifdef HTK_CRYPT   adpLM->encrypt = TRUE;     /* force to write encrypted model */#endif#endif   SaveLangModel(outFN,adpLM);   Exit(EXIT_SUCCESS);   return EXIT_SUCCESS; /* never reached -- make compiler happy */}/* ------------------------ Initialisation ----------------------- *//* Exists:  return true if given file exists */Boolean Exists(char *fn){   FILE *f;      if ((f=fopen(fn,"r")) == NULL) return FALSE;   fclose(f);   return TRUE;}/* Initialise: initialise global data structures */void Initialise(void){   int  i;   char path[256];   CreateHeap(&langHeap,"LModel mem",MSTAK,1,0.5,1000,20000);   if (wlistFN!=NULL) {      tgtVoc = &wlist;      CreateWordList(wlistFN,tgtVoc,10);   }   if (processText) {      /* init empty buffer */      CreateWordMap(NULL,&wmap,newWords);       wmap.hasCnts = TRUE;      wmap.name = defMapName;      wmap.htkEsc = htkEscape;      ++wmap.seqno;      mapUpdated = FALSE;           if (tgtVoc!=NULL) {      /* add words from word list to the map */	 pruneWords = TRUE;	 for (i=0; i<tgtVoc->used; i++) {	    AddWordToMap(&wmap,tgtVoc->id[i]);	 }	 SortWordMap(&wmap);	 unkId = GetLabId(unkStr,FALSE);        }            /* init ngram buffer */      MakeFN(rootFN,dbsDir,NULL,path);      stdBuf.used = 0;      stdBuf.ng[nSize] = 1;  /* count = 1 */      stdBuf.ngb = CreateNGBuffer(&langHeap,nSize,ngbSize,path,&wmap);   } else {      CreateWordMap(omapFN,&wmap,1);   }      CreateInputSet(&gstack,&wmap,&inSet);   binfo.wmap = &wmap;   binfo.inSet = &inSet;   binfo.nSize = nSize;}/* ----------------- NGram Counting Routines -------------------- *//* CompressBuffer: and save if necessary or mustSave is TRUE */void CompressBuffer(NGBuffer *ngb, Boolean mustSave){   float compx;   if (ngb->used == 0) return;   SortNGBuffer(ngb);   compx = 100.0 * (float)ngb->used / (float)ngb->poolsize;   if (trace&T_SAV)      printf(" buffer %s.%d compressed%s to %.1f%%\n",	     ngb->fn, ngb->fndx, mustSave?"[must save]":"",compx);   if (compx > 75.0 || mustSave) {      if (saveFiles && mustSave && mapUpdated) {	 SaveWordMap(omapFN,&wmap,FALSE);         mapUpdated = FALSE;         if (trace&T_TOP)             printf(" word map saved to %s\n",omapFN);      }      if (trace&T_TOP) {         printf(" saving %d ngrams to file %s.%d\n",                 ngb->used, ngb->fn, ngb->fndx);      }      WriteNGBuffer(ngb,txtSrc);   }}/* PutShiftRegister: push word into shift register and extract ngram */void PutShiftRegister(LabId id, ShiftReg *sr){   int i;   MapEntry *me;      if (trace&T_SHR){      printf("   %12s --> %s\n",id->name,sr->ngb->fn);      fflush(stdout);   }   AddWordToMap(&wmap,id);    mapUpdated = TRUE;   me = (MapEntry *)id->aux;   sr->ng[sr->used++] = me->ndx;   if (sr->used == nSize) {      /* record ngram */      StoreNGram(sr->ngb,sr->ng);      /* shift words */      sr->used--;      for (i=0; i<sr->used; i++)         sr->ng[i] = sr->ng[i+1];      /* compress buffer if full */      if (sr->ngb->used == sr->ngb->poolsize)  {         CompressBuffer(sr->ngb,FALSE);      }   }}/* ProcessText: read text files line by line and count ngrams */void ProcessText(char *fn, Boolean lastFile){   FILE *f;   LabId id;   Boolean isPipe;   char word[256];   if (trace&T_TOP)       printf("Reading source text file %s\n",(fn==NULL) ? "<stdin>" : fn);   if ((fn!=NULL) && (strcmp(fn,"-")!=0)) {      if ((f = FOpen(fn,LMTextFilter,&isPipe))==NULL)	 HError(16410,"ProcessText: unable to open text file %s", fn);   } else {      f = stdin;   }   while (fscanf(f,"%255s",word)==1) {      if (pruneWords) {	 if ((id = GetLabId(word,FALSE))==NULL && (id = unkId)==NULL) {	    stdBuf.used=0;	    continue;	 }      } else {	 id = GetLabId(word,TRUE);      }      if (trace&T_INP) printf("[%s]\n",id->name);      PutShiftRegister(id,&stdBuf);   }   if (fn!=NULL) {      FClose(f,isPipe);      if (lastFile)	 CompressBuffer(stdBuf.ngb,TRUE);   } else {      CompressBuffer(stdBuf.ngb,TRUE);   } }/* CombineModels: load models and combine with the one in memory */BackOffLM *CombineModels(MemHeap *heap,LMInfo *lmi,int nLModel,int nSize,WordMap *wl) {   int i,j,nw;   float x;   LMInfo *li;   BackOffLM *tgtLM;   WordMap wordList;   LabId lab;   NameId *na;   /* normalise weights */   for (x=0.0, i=1; i<nLModel; i++)      x += lmInfo[i].weight;   lmInfo[0].weight = 1.0-x;   /* load all models except the first one*/   for (li=lmInfo+1, i=1; i<nLModel; i++, li++) {      if (trace&T_TOP)	 printf("Loading language model from %s\n",li->fn);      li->lm = LoadLangModel(li->fn,wl,1.0,LMP_FLOAT,heap);   }   if (wl==NULL) {      wl = &wordList;      /* derive word list from LMs */      for (li=lmInfo, i=0; i<nLModel; i++, li++) {  	 na = li->lm->binMap;	 for (j=0; j<li->lm->vocSize; j++) {	    lab = GetLabId(na[j+1]->name,TRUE);	    lab->aux=NULL; 	 }      }      for (nw=0,li=lmInfo, i=0; i<nLModel; i++, li++) {  	 na = li->lm->binMap;	 for (j=0; j<li->lm->vocSize; j++) {	    lab = GetLabId(na[j+1]->name,FALSE);	    if (lab->aux==NULL) {	       nw++; lab->aux = (Ptr) wl;	    }	 }      }      CreateWordList(NULL,wl,nw+10);      for (nw=0,li=lmInfo, i=0; i<nLModel; i++, li++) {	 na = li->lm->binMap;	 for (j=0; j<li->lm->vocSize; j++) {	    lab = GetLabId(na[j+1]->name,FALSE);	    if (lab->aux==(Ptr) wl) {	       wl->id[nw++]=lab; lab->aux = NULL;	    }	 }      }      wl->used = nw;   }   if (trace&T_TOP) {      printf("Using language model(s): \n");      for (li=lmInfo,i=0; i<nLModel; i++,li++)	 printf("  %d-gram %s, weight %.2f\n",li->lm->nSize,li->fn,li->weight);   }   if (trace&T_TOP) {      printf("Generating %d-gram model %s\n",nSize,outFN);      fflush(stdout);   }   tgtLM = MergeModels(heap,lmInfo,nLModel,nSize,wl);#ifdef HTK_CRYPT      if (tgtLM->encrypt && binfo.saveFmt==LMF_TEXT)      binfo.saveFmt = LMF_BINARY;#endif   for (i=1; i<=nSize; i++) {      tgtLM->gInfo[i].fmt = (i==1) ? LMF_TEXT : binfo.saveFmt;   }   return tgtLM;}/* ---------------------- End of LAdapt.c ----------------------- */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -