📄 hbuild.c

📁 隐马尔科夫模型工具箱
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
   Lattice *lat;   Boolean enterFound=FALSE;   Boolean exitFound=FALSE;   if (nLM->nsize > 2)      HError(3030,"ProcessBoBiGram: Not BiGram LM: Order = %d",nLM->nsize);   for (i=1; i <= nLM->counts[1]; i++) {      if (nLM->wdlist[i] == enterId) enterFound = TRUE;      if (nLM->wdlist[i] == exitId) exitFound = TRUE;      if (enterFound && exitFound)         break;   }   if (!enterFound)       HError(3030,"ProcessBoBiGram: Bigram does not contain ENTER symbol %s",             enterId->name);   if (!exitFound)       HError(3030,"ProcessBoBiGram: Bigram does not contain EXIT symbol %s",             exitId->name);   nNode = nLM->counts[1] + 1;   /* this is a maximum size */   nArc =  nLM->counts[2] + 2*nLM->counts[1];   lat = NewLattice(latHeap,nNode,nArc);   lat->voc = voc;   lat->lmscale = 1.0; lat->wdpenalty = 0.0;    /* go through the LM - get wordId from voc and add LM probs */   wd = voc->nullWord;   ln = lat->lnodes;   ln->word = wd; ln->n=0; ln->v=0;   for (i = 0 ; i <= NSIZE; i++) ndx[i] = 0;    for (i=1,j=1,k=0; i <= nLM->counts[1]; i++) {      wd = GetWord(voc,nLM->wdlist[i],FALSE);      if ((nLM->wdlist[i] == unknownId) && zapUnknown)         continue;      if (wd == NULL)         HError(3031,"ProcessBoBiGram: Word %s in LM not in WordList",                nLM->wdlist[i]->name);      ln = lat->lnodes+j;      ln->word = wd; ln->n=0; ln->v=0;      wd->aux = (Ptr) j;      if (nLM->wdlist[i] != enterId) {         la = lat->larcs+k;         la->start = lat->lnodes;         la->end = lat->lnodes+j;         la->lmlike = nLM->unigrams[i];         k++;      }      j++;   }   lat->nn = j;   lat->na = k;   la = lat->larcs+k;   for (i=1; i <= nLM->counts[1]; i++) {      if ((nLM->wdlist[i] == unknownId) && zapUnknown)         continue;      if (nLM->wdlist[i] == exitId)         continue;      ndx[0] = i;      ne = GetNEntry(nLM,ndx,FALSE);      fromWd =  GetWord(voc,nLM->wdlist[i],FALSE);      fromNode =  lat->lnodes+((int) fromWd->aux);      la->start = fromNode;    /* backoff weight */      la->end = lat->lnodes;      if (ne==NULL) la->lmlike = 0.0;      else la->lmlike = ne->bowt;      la++; lat->na++;      if (ne!=NULL)         for (k = 0, se = ne->se; k < ne->nse; k++, se++) {            if ((nLM->wdlist[se->word] == unknownId) && zapUnknown)               continue;            toWd = GetWord(voc,nLM->wdlist[se->word],FALSE);            toNode = lat->lnodes+((int) toWd->aux);            if (nLM->wdlist[se->word] != enterId) {               la->start = fromNode;               la->end = toNode;               la->lmlike = se->prob;               la++; lat->na++;            }         }   }   return lat;}/*ProcessMatBiGram: Convert matrix bigram in bg into lattice */ Lattice *ProcessMatBiGram(MemHeap *latHeap, Vocab *voc, MatBiLM *bg){   int nNode,nArc;   LNode *ln,*fromNode,*toNode;   LArc *la;   Word wd,fromWd,toWd;   int i,j;   int skipWord=0;   Lattice *lat;   Vector row;   if (bg->wdlist[1] != enterId)      HError(3030,"ProcessMatBiGram: Bigram does not contain ENTER symbol %s",             enterId->name);   if (bg->wdlist[bg->numWords] != exitId)      HError(3030,"ProcessMatBiGram: Bigram does not contain EXIT symbol %s",             exitId->name);   nNode = bg->numWords;           /* this is a maximum size */   nArc =  (bg->numWords-2)*bg->numWords;   lat = NewLattice(latHeap,nNode,nArc);   lat->voc = voc;   lat->lmscale = 1.0; lat->wdpenalty = 0.0;    for (i=1,j=0; i <= bg->numWords; i++) {      wd = GetWord(voc,bg->wdlist[i],FALSE);      if ((bg->wdlist[i] == unknownId) && zapUnknown) {         skipWord = i;          continue;      }      if (wd == NULL)         HError(3031,"ProcessMatBiGram: Word %s in LM not in WordList",                bg->wdlist[i]->name);      ln = lat->lnodes+j;      ln->word = wd; ln->n=0; ln->v=0;      wd->aux = (Ptr) j;      j++;   }   lat->nn = j;   lat->na = (j-2)*j;   la = lat->larcs;   for (i=1,j=0; i < bg->numWords; i++) {      row = bg->bigMat[i];      fromWd =  GetWord(voc,bg->wdlist[i],FALSE);      fromNode =  lat->lnodes+((int) fromWd->aux);      if (i == skipWord) continue;      for (j=2; j <= (i==1?bg->numWords-1:bg->numWords); j++) {         if (j == skipWord) continue;         toWd = GetWord(voc,bg->wdlist[j],FALSE);         toNode = lat->lnodes+((int) toWd->aux);         la->start = fromNode;         la->end = toNode;         la->lmlike = row[j];         la++;      }   }   return lat;}/* ProcessBiGram: Convert bigram in biLM into lattice */Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM){   Lattice *lat;   switch (biLM->type) {   case boNGram:      if (trace & T_TOP)         printf("Converting back-off bigram -> lattice\n");      lat = ProcessBoBiGram(latHeap,voc,biLM->data.ngram);      break;   case matBigram:      if (trace & T_TOP)         printf("Converting matrix bigram -> lattice\n");      lat = ProcessMatBiGram(latHeap,voc,biLM->data.matbi);      break;   default:      HError(3030,"ProcessBiGram: Unknown bigram type");   }   return lat;}         /* --------------- Word-Pair Grammar types and routines ------------- */typedef struct _WordFllr{   /* storage for word followers */   Word wd;   struct _WordFllr *next;}WordFllr;typedef struct _GramEntry{   int wordNum;   Word wd;   int numFllrs;   WordFllr *entry;   struct _GramEntry *next;}GramEntry;typedef struct {   int nwords;   int nfllrs;   GramEntry *glist;      MemHeap entryHeap;   MemHeap fllrHeap;}WPGrammar;/* --------------- Read the WP Grammar ----------------------- *//* SkipHeader: skip comments at top of file *//*             and return true if not eof   */Boolean SkipHeader(FILE *f){   int ch;   Boolean inComment;      ch = getc(f);        /* skip leading space */   while (ch != EOF && isspace(ch))      ch = getc(f);   if (ch == '/') {      ch = getc(f);            inComment = (ch == '*');      if (!inComment)         HError(3040,"SkipHeader: / char illegal if not in comment or delimiter");        else         while (ch != EOF && inComment) {            ch = getc(f);            if (ch == '*') {               ch = getc(f);               inComment = (ch != '/');            }         }   }        ch = getc(f);   while (ch != EOF && isspace(ch))      ch = getc(f);   if (ch == EOF) return FALSE;   ungetc(ch,f);   return TRUE;}/* SkipSpacesEoln: skip white to eoln return true if not eof */Boolean SkipSpacesEoln(FILE *f){   int ch;   ch = getc(f);   while (ch != EOF && isspace(ch) && ch != '\n')      ch = getc(f);   if (ch == EOF) return FALSE;   return TRUE;}/* NumberEntries: number all entries in the wpg */void NumberEntries(WPGrammar *wpg, Word sentEnd){   GramEntry *gid;   int count = 0;   gid = wpg->glist;   while (gid != NULL) {      if (gid->wd !=  sentEnd) {         count++;          gid->wordNum = count;      }      else         gid->wordNum = 0;      gid = gid->next;   }}      void ReadWPGrammar(WPGrammar *wpg, Vocab * voc, char *gramFn){   FILE *gf;   char buf[255];   int ch;   Word newWord;   GramEntry *newGram;   Boolean newEntry;   WordFllr *wdfllr;    Word sentEnd;         sentEnd = GetWord(voc,GetLabId("SENTENCE-END",TRUE),TRUE);    if ( (gf = fopen(gramFn,"r")) == NULL)      HError(3010,"ReadWPGrammar: Cannot open word-pair grammar file %s",gramFn);   if (trace && T_TOP)      printf("Loading word-pair grammar %s\n",gramFn);   if (!SkipHeader(gf))      HError(3040,"ReadWPGrammar: Unexpected eof while reading %s", gramFn);   do {      ch = getc(gf);      newEntry = (ch == '>');      if (wpg->nwords == 0 && !newEntry)         HError(3040,"ReadWPGrammar: > expected while reading %s", gramFn);      if (!ReadLabel(gf,buf)) {         if (newEntry)            HError(3040,"ReadWPGrammar: Word entry expected in %s",gramFn);         else            break;      }      if (newEntry) {         newWord = GetWord(voc,GetLabId(buf, TRUE),FALSE);         if (newWord == NULL)            HError(3040,"ReadWPGrammar: Word %s not in wordlist but in grammar file",buf);         newGram = (GramEntry *) New(&wpg->entryHeap,sizeof(GramEntry));         newWord->aux = (Ptr) newGram;         newGram->wd = newWord;         newGram->next = wpg->glist;         newGram->entry = NULL;         wpg->glist = newGram;         wpg->nwords++;      }       else {         wdfllr = (WordFllr *) New(&wpg->fllrHeap,sizeof(WordFllr));         wdfllr->next = newGram->entry;         wdfllr->wd = GetWord(voc,GetLabId(buf, TRUE),FALSE);         if (wdfllr->wd == NULL)            HError(3040,"ReadWPGrammar: Word %s not in wordlist but in grammar file",buf);         newGram->entry = wdfllr;         newGram->numFllrs++;         wpg->nfllrs++;      }   } while (SkipSpacesEoln(gf));   fclose(gf);   NumberEntries(wpg,sentEnd);   if (trace & T_TOP)      printf("Word-pair grammar %s loaded\n",gramFn);}Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *gramFn){   int nNode,nArc;   LNode *ln,*toNode;   LArc *la;   Word wd;   Lattice *lat;   int j;   WPGrammar wpg;   GramEntry *gid;   WordFllr *fid;   wpg.nwords = 0;   wpg.nfllrs = 0;   wpg.glist = NULL;   CreateHeap(&wpg.entryHeap,"GramEntry Heap",MHEAP,sizeof(GramEntry),              1.2,100,1000);   CreateHeap(&wpg.fllrHeap,"WordFllr Heap",MHEAP,sizeof(WordFllr),              1.2,1000,10000);   ReadWPGrammar(&wpg,voc,gramFn);   nNode = wpg.nwords+1;   nArc = wpg.nfllrs;   lat = NewLattice(latHeap,nNode,nArc);   lat->voc = voc;   lat->lmscale = 1.0; lat->wdpenalty = 0.0;    ln = lat->lnodes; ln->n=0; ln->v=0;   ln = lat->lnodes+nNode-1; ln->n=0; ln->v=0;   gid = wpg.glist;   j = 0;   while (gid != NULL) {      ln = lat->lnodes+gid->wordNum;      ln->word = gid->wd; ln->n=0; ln->v=0;      fid = gid->entry;      while (fid != NULL) {         la = lat->larcs+j;         la->start = ln;         toNode = lat->lnodes+((GramEntry *) (fid->wd->aux))->wordNum;         if (toNode == lat->lnodes) toNode = lat->lnodes+nNode-1;         la->end = toNode;         la->lmlike = log(1.0/((float) gid->numFllrs));         j++;         fid = fid->next;      }      gid = gid->next;   }   if (bStartId != NULL) {      wd = GetWord(voc,bStartId,TRUE);      ln = lat->lnodes; ln->word = wd;      wd = GetWord(voc,bEndId,TRUE);      ln = lat->lnodes+nNode-1; ln->word = wd;   }   else {      ln =  lat->lnodes; ln->word = voc->nullWord;      ln = lat->lnodes+nNode-1; ln->word = voc->nullWord;   }   return lat;}/* ------------------- End of HBuild.c --------------------------------- */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -