📄 hlm.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
         break;      if (i!=j || k==0)          HError(8150,"ReadBoNGram: %dGram count missing (%s)",i,buf);      switch (ngFmtCh) {      case '=':         ngBin[j] = FALSE;         break;      case '~':         ngBin[j] = TRUE;         break;      default:         HError (9999, "ReadARPALM: unknown ngram format type '%c'", ngFmtCh);      }      counts[j]=k;   }   if (ngBin[1])      HError (8113, "ReadARPALM: unigram must be stored as text");   nglm=CreateBoNGram(lm,counts[1],counts);   for (i=1;i<=nglm->nsize;i++) {      sprintf(syc,"\\%d-grams:",i);      SyncStr(buf,syc);      ReadNGrams(nglm,i,nglm->counts[i], ngBin[i]);   }   SyncStr(buf,"\\end\\");   CloseSource(&source);   if (trace&T_TIO) {      printf("\n NEntry==%d ",nglm->counts[0]);      for(i=1;i<=nglm->nsize;i++)         printf(" %d-Grams==%d",i,nglm->counts[i]);      printf("\n\n");      fflush(stdout);   }}/* WriteBoNGram: write out WSJ/DP format ngram */static void WriteBoNGram(LModel *lm,char *fn,int flags){   int i,k;   FILE *file;   NGramLM *nglm;   Boolean isPipe;   nglm = lm->data.ngram;   file=FOpen(fn,LangModOFilter,&isPipe);   fprintf(file,"\\data\\\n");   for (i=1;i<=nglm->nsize;i++) {      fprintf(file,"ngram %d=%d\n",i,nglm->counts[i]);   }   for (i=1;i<=nglm->nsize;i++) {      k = WriteNGrams(file,nglm,i,1.0/LN10);      if (k!=nglm->counts[i])         HError(-8190,"WriteBoNGram: Counts disagree for %dgram (%d vs %d)",                i, k, nglm->counts[i]);   }   fprintf(file,"\n\\end\\\n");   FClose(file,isPipe);}void ClearBoNGram(LModel *lm){   NGramLM *nglm = lm->data.ngram;   int i;      for(i=1;i<=nglm->vocSize;i++)      if (nglm->wdlist[i]!=NULL) nglm->wdlist[i]->aux=0;}/* -------------- Matrix Bigram Handling Routines ----------- */MatBiLM *CreateMatBigram(LModel *lm,int nw){   MatBiLM *matbi;     matbi = (MatBiLM *) New(lm->heap,sizeof(MatBiLM));   lm->data.matbi = matbi;   matbi->heap = lm->heap;      matbi->numWords = nw;   matbi->wdlist = (LabId *) New(lm->heap,sizeof(LabId)*(nw+1));   matbi->bigMat = CreateMatrix(lm->heap,nw,nw);   ZeroMatrix(matbi->bigMat);   return(matbi);}/* ReadRow: read a row from bigram file f into v */int ReadRow(Vector v){   int i,j,N,cnt,c;   float x;   N = VectorSize(v);   i=0;    while(!source.wasNewline) {      x = GetFloat(FALSE);      c=GetCh(&source);      if (c == '*')         cnt=GetInt();      else {         UnGetCh(c,&source);         cnt=1;      }      SkipWhiteSpace(&source);      for (j=0;j<cnt;j++) {         i++;         if (i<=N) v[i] = x;      }   }   return(i);}/* ReadBigram: load a bigram from given file */static void ReadMatBigram(LModel *lm,char *fn){   Vector vec;   char buf[132];   int P,p,j;   float sum,x;   LabId id;   MatBiLM *matbi;     if (trace&T_TIO)      printf("\nMB "),fflush(stdout);   if(InitSource(fn,&source,LangModFilter)<SUCCESS)      HError(8110,"ReadMatBigram: Can't open file %s", fn);   vec = CreateVector(&gcheap,MAX_LMID);   ReadLMWord(buf);SkipWhiteSpace(&source);   id=GetLabId(buf,TRUE);   P = ReadRow(vec);   if (P<=0 || P >MAX_LMID)      HError(8151,"ReadMatBigram: First row invalid (%d entries)",P);   matbi=CreateMatBigram(lm,P);   matbi->wdlist[1] = id;   for (p=1;p<=P;p++) matbi->bigMat[1][p]=vec[p];   id->aux=(Ptr) 1;   Dispose(&gcheap,vec);   for (sum=0.0, j=1; j<=P; j++) {      x = matbi->bigMat[1][j];      if (x<0)         HError(8151,"ReadMatBigram: In bigram, entry %d for %s is -ve (%e)",                j,buf,x);      sum += x;      matbi->bigMat[1][j]=((x<MINLARG)?LZERO:log(x));   }   if (sum < 0.99 || sum > 1.01)      HError(-8151,"ReadMatBigram: Row %d of bigram %s adds up to %f",1,fn,sum);   for (p=2; ReadLMWord(buf); p++) {      if (trace&T_TIO) {         if ((p%25)==0)            printf(". "),fflush(stdout);         if ((p%800)==0)            printf("\n   "),fflush(stdout);      }      if (p>P)         HError(8150,"ReadMatBigram: More rows than columns in bigram %s",fn);      id=GetLabId(buf,TRUE);      if ((int)id->aux != 0)          HError(8150,"ReadMatBigram: Duplicated name %s in bigram %s",buf,fn);      id->aux = (Ptr) p;      matbi->wdlist[p] = id;      SkipWhiteSpace(&source);      if (ReadRow(matbi->bigMat[p])!=P)         HError(8150,"ReadMatBigram: Wrong number of items in row %d",p);      for (sum=0.0, j=1; j<=P; j++) {         x = matbi->bigMat[p][j];         if (x<0)            HError(8151,"ReadMatBigram: In bigram, entry %d for %s is -ve (%e)",                   j,buf,x);         sum += x;         matbi->bigMat[p][j]=((x<MINLARG)?LZERO:log(x));      }      if (sum < 0.99 || sum > 1.01)         HError(-8151,"ReadMatBigram: Row %d of bigram %s adds up to %f",p,fn,sum);   }   if (P>p)      HError(8150,"ReadMatBigram: More columns than rows in bigram %s",fn);   if (trace&T_TIO)      printf("\n"),fflush(stdout);   CloseSource(&source);}/* WriteMatBigram: write out old HVite format bigram */static void WriteMatBigram(LModel *lm,char *fn,int flags){   const float epsilon = 0.000001;   MatBiLM *matbi;   FILE *file;   Boolean isPipe;   Vector v;   double x,y;   int i,j,rep;   if (trace&T_TIO)      printf("\nMB "),fflush(stdout);   matbi = lm->data.matbi;   file=FOpen(fn,LangModOFilter,&isPipe);   for (i=1;i<=matbi->numWords;i++) {      if (trace&T_TIO) {         if ((i%25)==0)            printf(". "),fflush(stdout);         if ((i%800)==0)            printf("\n   "),fflush(stdout);      }      fprintf(file,"%-8s ",ReWriteString(matbi->wdlist[i]->name,                                         NULL,ESCAPE_CHAR));      v=matbi->bigMat[i];rep=0;x=-1.0;      for (j=1;j<=matbi->numWords;j++){         y = L2F(v[j]);         if (fabs(y - x) <= epsilon) rep++;         else {            if (rep>0) {               fprintf(file,"*%d",rep+1);               rep=0;            }            x = y;            if (x == 0.0)               fprintf(file," 0");            else if (x == 1.0)               fprintf(file," 1");            else               fprintf(file," %e",x);         }      }      if (rep>0)         fprintf(file,"*%d",rep+1);      fprintf(file,"\n");   }   FClose(file,isPipe);   if (trace&T_TIO)      printf("\n"),fflush(stdout);}/*------------------------- User Interface --------------------*//* EXPORT GetLMProb: return probability of word wd_id following pr_id[] */float GetLMProb(LModel *lm, LabId prid[NSIZE], LabId wdid){   LabId cpid[NSIZE];   NEntry *ne;   SEntry *se;   lmId p, q, word, ndx[NSIZE];   LogFloat bowt,prob;   int i, s;     switch (lm->type) {   case boNGram:      word = (int)wdid->aux;      if (word==0 || word>lm->data.ngram->vocSize)         return(LZERO);      for (s=-1,i=0;i<NSIZE;i++)         if (prid[i]!=NULL)             ndx[i]=(int)prid[i]->aux, cpid[i]=prid[i], s=i;         else            ndx[i]=0, cpid[i]=NULL;      /* If no answer back-off to unigram */      if (s<0) {         if (word!=0)            return(lm->data.ngram->unigrams[word]);         else            return(log(1.0/lm->data.ngram->vocSize));      }      cpid[s]=0;      ne = GetNEntry(lm->data.ngram,ndx,FALSE);      if (ne) {         /* Replace with bsearch equivalent */         for (i=0, se=ne->se; i<ne->nse; i++,se++)            if (se->word==word)                return(se->prob); /* Ngram found */         bowt=ne->bowt;      }      else {         bowt=0.0;      }          if (s==0)         return(lm->data.ngram->unigrams[word]+bowt); /* Backoff to unigram */      else         return(bowt+GetLMProb(lm,cpid,wdid)); /* else recurse */      break;   case matBigram:      p=(int) prid[0]->aux;      q=(int) wdid->aux;      return(lm->data.matbi->bigMat[p][q]);   default:      prob=LZERO;   }   return(prob);}/* EXPORT ReadLModel: Determine LM type and then read-in */LModel *ReadLModel(MemHeap *heap,char *fn){   LModel *lm;   LMType type;   char buf[MAXSTRLEN+1];   int i;   lm=(LModel*)New(heap,sizeof(LModel));   lm->heap=heap;   lm->name=CopyString(heap,fn);   if(InitSource(fn,&source,LangModFilter)<SUCCESS)      HError(8110,"ReadLModel: Can't open file %s", fn);   type=boNGram;i=0;   do {      if (i++==1000) {         type=matBigram;         break;      }      GetInLine(buf);   }   while (strcmp(buf, "\\data\\")!=0);   CloseSource(&source);   lm->type=type;   switch(type) {   case boNGram:      ReadBoNGram(lm,fn);      break;   case matBigram:      ReadMatBigram(lm,fn);      break;   }   return(lm);}/* EXPORT WriteLModel: Determine LM type and then write-out */void WriteLModel(LModel *lm,char *fn,int flags){   switch(lm->type) {   case boNGram:      WriteBoNGram(lm,fn,flags);      break;   case matBigram:      WriteMatBigram(lm,fn,flags);      break;   }}void ClearLModel(LModel *lm){   switch(lm->type) {   case boNGram:      ClearBoNGram(lm);      break;   case matBigram:      break;   }}/*----------------------------------------------------------------------*/#ifndef NO_LAT_LM/* FindSEntry     find SEntry for wordId in array using binary search*/static SEntry *FindSEntry (SEntry *se, lmId pronId, int l, int h){   /*#### here l,h,c must be signed */   int c;   while (l <= h) {      c = (l + h) / 2;      if (se[c].word == pronId)          return &se[c];      else if (se[c].word < pronId)         l = c + 1;      else         h = c - 1;   }   return NULL;}/* LMTransProb_ngram     return logprob of transition from src labelled word. Also return dest state.     ngram case*/LogFloat LMTrans (LModel *lm, LMState src, LabId wdid, LMState *dest){   NGramLM *nglm;   LogFloat lmprob;   lmId hist[NSIZE] = {0};      /* initialise whole array to zero! */   int i, l;   NEntry *ne;   SEntry *se;   lmId word;   assert (lm->type == boNGram);   nglm = lm->data.ngram;   word = (int) wdid->aux;   if (word==0 || word>lm->data.ngram->vocSize) {      HError (-9999, "word %d not in LM wordlist", word);      *dest = NULL;      return (LZERO);   }   ne = src;      if (!src) {          /* unigram case */      lmprob = nglm->unigrams[word];   }   else {      /* lookup prob p(word | src) */      /* try to find pronid in SEntry array */      se = FindSEntry (ne->se, word, 0, ne->nse - 1);      assert (!se || (se->word == word));      if (se)        /* found */         lmprob = se->prob;      else {             /* not found */         lmprob = 0.0;         l = 0;         hist[NSIZE-1] = 0;         for (i = 0; i < NSIZE-1; ++i) {            hist[i] = ne->word[i];            if (hist[i] != 0)               l = i;         } /* l is now the index of the last (oldest) non zero element */                  for ( ; l > 0; --l) {            if (ne)               lmprob += ne->bowt;            hist[l] = 0;   /* back-off: discard oldest word */            ne = GetNEntry (nglm, hist, FALSE);            if (ne) {   /* skip over non existing hists. fix for weird LMs */               /* try to find pronid in SEntry array */               se = FindSEntry (ne->se, word, 0, ne->nse - 1);               assert (!se || (se->word == word));               if (se) { /* found it */                  lmprob += se->prob;                  l = -1;                  break;               }            }         }         if (l == 0) {          /* backed-off all the way to unigram */            assert (!se);            lmprob += ne->bowt;            lmprob += nglm->unigrams[word];         }      }   }   /* now determine dest state */   if (src) {      ne = (NEntry *) src;            l = 0;      hist[NSIZE-1] = 0;      for (i = 1; i < NSIZE-1; ++i) {         hist[i] = ne->word[i-1];         if (hist[i] != 0)            l = i;      } /* l is now the index of the last (oldest) non zero element */   }   else {      for (i = 1; i < NSIZE-1; ++i)         hist[i] = 0;      l = 1;   }   hist[0] = word;   ne = (LMState) GetNEntry (nglm, hist, FALSE);   for ( ; !ne && (l > 0); --l) {      hist[l] = 0;              /* back off */      ne = (LMState) GetNEntry (nglm, hist, FALSE);   }   /* if we left the loop because l=0, then ne is still NULL, which is what we want */   *dest = ne;#if 0   printf ("lmprob = %f  dest %p\n", lmprob, *dest);#endif   return (lmprob);}#endif/* ------------------------- End of HLM.c ------------------------- */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -