📄 hbuild.c
字号:
Lattice *lat; Boolean enterFound=FALSE; Boolean exitFound=FALSE; if (nLM->nsize > 2) HError(3030,"ProcessBoBiGram: Not BiGram LM: Order = %d",nLM->nsize); for (i=1; i <= nLM->counts[1]; i++) { if (nLM->wdlist[i] == enterId) enterFound = TRUE; if (nLM->wdlist[i] == exitId) exitFound = TRUE; if (enterFound && exitFound) break; } if (!enterFound) HError(3030,"ProcessBoBiGram: Bigram does not contain ENTER symbol %s", enterId->name); if (!exitFound) HError(3030,"ProcessBoBiGram: Bigram does not contain EXIT symbol %s", exitId->name); nNode = nLM->counts[1] + 1; /* this is a maximum size */ nArc = nLM->counts[2] + 2*nLM->counts[1]; lat = NewLattice(latHeap,nNode,nArc); lat->voc = voc; lat->lmscale = 1.0; lat->wdpenalty = 0.0; /* go through the LM - get wordId from voc and add LM probs */ wd = voc->nullWord; ln = lat->lnodes; ln->word = wd; ln->n=0; ln->v=0; for (i = 0 ; i <= NSIZE; i++) ndx[i] = 0; for (i=1,j=1,k=0; i <= nLM->counts[1]; i++) { wd = GetWord(voc,nLM->wdlist[i],FALSE); if ((nLM->wdlist[i] == unknownId) && zapUnknown) continue; if (wd == NULL) HError(3031,"ProcessBoBiGram: Word %s in LM not in WordList", nLM->wdlist[i]->name); ln = lat->lnodes+j; ln->word = wd; ln->n=0; ln->v=0; wd->aux = (Ptr) j; if (nLM->wdlist[i] != enterId) { la = lat->larcs+k; la->start = lat->lnodes; la->end = lat->lnodes+j; la->lmlike = nLM->unigrams[i]; k++; } j++; } lat->nn = j; lat->na = k; la = lat->larcs+k; for (i=1; i <= nLM->counts[1]; i++) { if ((nLM->wdlist[i] == unknownId) && zapUnknown) continue; if (nLM->wdlist[i] == exitId) continue; ndx[0] = i; ne = GetNEntry(nLM,ndx,FALSE); fromWd = GetWord(voc,nLM->wdlist[i],FALSE); fromNode = lat->lnodes+((int) fromWd->aux); la->start = fromNode; /* backoff weight */ la->end = lat->lnodes; if (ne==NULL) la->lmlike = 0.0; else la->lmlike = ne->bowt; la++; lat->na++; if (ne!=NULL) for (k = 0, se = ne->se; k < ne->nse; k++, se++) { if ((nLM->wdlist[se->word] == unknownId) && zapUnknown) continue; toWd = GetWord(voc,nLM->wdlist[se->word],FALSE); toNode = lat->lnodes+((int) toWd->aux); if (nLM->wdlist[se->word] != enterId) { la->start = fromNode; la->end = toNode; la->lmlike = se->prob; la++; lat->na++; } } } return lat;}/*ProcessMatBiGram: Convert matrix bigram in bg into lattice */ Lattice *ProcessMatBiGram(MemHeap *latHeap, Vocab *voc, MatBiLM *bg){ int nNode,nArc; LNode *ln,*fromNode,*toNode; LArc *la; Word wd,fromWd,toWd; int i,j; int skipWord=0; Lattice *lat; Vector row; if (bg->wdlist[1] != enterId) HError(3030,"ProcessMatBiGram: Bigram does not contain ENTER symbol %s", enterId->name); if (bg->wdlist[bg->numWords] != exitId) HError(3030,"ProcessMatBiGram: Bigram does not contain EXIT symbol %s", exitId->name); nNode = bg->numWords; /* this is a maximum size */ nArc = (bg->numWords-2)*bg->numWords; lat = NewLattice(latHeap,nNode,nArc); lat->voc = voc; lat->lmscale = 1.0; lat->wdpenalty = 0.0; for (i=1,j=0; i <= bg->numWords; i++) { wd = GetWord(voc,bg->wdlist[i],FALSE); if ((bg->wdlist[i] == unknownId) && zapUnknown) { skipWord = i; continue; } if (wd == NULL) HError(3031,"ProcessMatBiGram: Word %s in LM not in WordList", bg->wdlist[i]->name); ln = lat->lnodes+j; ln->word = wd; ln->n=0; ln->v=0; wd->aux = (Ptr) j; j++; } lat->nn = j; lat->na = (j-2)*j; la = lat->larcs; for (i=1,j=0; i < bg->numWords; i++) { row = bg->bigMat[i]; fromWd = GetWord(voc,bg->wdlist[i],FALSE); fromNode = lat->lnodes+((int) fromWd->aux); if (i == skipWord) continue; for (j=2; j <= (i==1?bg->numWords-1:bg->numWords); j++) { if (j == skipWord) continue; toWd = GetWord(voc,bg->wdlist[j],FALSE); toNode = lat->lnodes+((int) toWd->aux); la->start = fromNode; la->end = toNode; la->lmlike = row[j]; la++; } } return lat;}/* ProcessBiGram: Convert bigram in biLM into lattice */Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM){ Lattice *lat; switch (biLM->type) { case boNGram: if (trace & T_TOP) printf("Converting back-off bigram -> lattice\n"); lat = ProcessBoBiGram(latHeap,voc,biLM->data.ngram); break; case matBigram: if (trace & T_TOP) printf("Converting matrix bigram -> lattice\n"); lat = ProcessMatBiGram(latHeap,voc,biLM->data.matbi); break; default: HError(3030,"ProcessBiGram: Unknown bigram type"); } return lat;} /* --------------- Word-Pair Grammar types and routines ------------- */typedef struct _WordFllr{ /* storage for word followers */ Word wd; struct _WordFllr *next;}WordFllr;typedef struct _GramEntry{ int wordNum; Word wd; int numFllrs; WordFllr *entry; struct _GramEntry *next;}GramEntry;typedef struct { int nwords; int nfllrs; GramEntry *glist; MemHeap entryHeap; MemHeap fllrHeap;}WPGrammar;/* --------------- Read the WP Grammar ----------------------- *//* SkipHeader: skip comments at top of file *//* and return true if not eof */Boolean SkipHeader(FILE *f){ int ch; Boolean inComment; ch = getc(f); /* skip leading space */ while (ch != EOF && isspace(ch)) ch = getc(f); if (ch == '/') { ch = getc(f); inComment = (ch == '*'); if (!inComment) HError(3040,"SkipHeader: / char illegal if not in comment or delimiter"); else while (ch != EOF && inComment) { ch = getc(f); if (ch == '*') { ch = getc(f); inComment = (ch != '/'); } } } ch = getc(f); while (ch != EOF && isspace(ch)) ch = getc(f); if (ch == EOF) return FALSE; ungetc(ch,f); return TRUE;}/* SkipSpacesEoln: skip white to eoln return true if not eof */Boolean SkipSpacesEoln(FILE *f){ int ch; ch = getc(f); while (ch != EOF && isspace(ch) && ch != '\n') ch = getc(f); if (ch == EOF) return FALSE; return TRUE;}/* NumberEntries: number all entries in the wpg */void NumberEntries(WPGrammar *wpg, Word sentEnd){ GramEntry *gid; int count = 0; gid = wpg->glist; while (gid != NULL) { if (gid->wd != sentEnd) { count++; gid->wordNum = count; } else gid->wordNum = 0; gid = gid->next; }} void ReadWPGrammar(WPGrammar *wpg, Vocab * voc, char *gramFn){ FILE *gf; char buf[255]; int ch; Word newWord; GramEntry *newGram; Boolean newEntry; WordFllr *wdfllr; Word sentEnd; sentEnd = GetWord(voc,GetLabId("SENTENCE-END",TRUE),TRUE); if ( (gf = fopen(gramFn,"r")) == NULL) HError(3010,"ReadWPGrammar: Cannot open word-pair grammar file %s",gramFn); if (trace && T_TOP) printf("Loading word-pair grammar %s\n",gramFn); if (!SkipHeader(gf)) HError(3040,"ReadWPGrammar: Unexpected eof while reading %s", gramFn); do { ch = getc(gf); newEntry = (ch == '>'); if (wpg->nwords == 0 && !newEntry) HError(3040,"ReadWPGrammar: > expected while reading %s", gramFn); if (!ReadLabel(gf,buf)) { if (newEntry) HError(3040,"ReadWPGrammar: Word entry expected in %s",gramFn); else break; } if (newEntry) { newWord = GetWord(voc,GetLabId(buf, TRUE),FALSE); if (newWord == NULL) HError(3040,"ReadWPGrammar: Word %s not in wordlist but in grammar file",buf); newGram = (GramEntry *) New(&wpg->entryHeap,sizeof(GramEntry)); newWord->aux = (Ptr) newGram; newGram->wd = newWord; newGram->next = wpg->glist; newGram->entry = NULL; wpg->glist = newGram; wpg->nwords++; } else { wdfllr = (WordFllr *) New(&wpg->fllrHeap,sizeof(WordFllr)); wdfllr->next = newGram->entry; wdfllr->wd = GetWord(voc,GetLabId(buf, TRUE),FALSE); if (wdfllr->wd == NULL) HError(3040,"ReadWPGrammar: Word %s not in wordlist but in grammar file",buf); newGram->entry = wdfllr; newGram->numFllrs++; wpg->nfllrs++; } } while (SkipSpacesEoln(gf)); fclose(gf); NumberEntries(wpg,sentEnd); if (trace & T_TOP) printf("Word-pair grammar %s loaded\n",gramFn);}Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *gramFn){ int nNode,nArc; LNode *ln,*toNode; LArc *la; Word wd; Lattice *lat; int j; WPGrammar wpg; GramEntry *gid; WordFllr *fid; wpg.nwords = 0; wpg.nfllrs = 0; wpg.glist = NULL; CreateHeap(&wpg.entryHeap,"GramEntry Heap",MHEAP,sizeof(GramEntry), 1.2,100,1000); CreateHeap(&wpg.fllrHeap,"WordFllr Heap",MHEAP,sizeof(WordFllr), 1.2,1000,10000); ReadWPGrammar(&wpg,voc,gramFn); nNode = wpg.nwords+1; nArc = wpg.nfllrs; lat = NewLattice(latHeap,nNode,nArc); lat->voc = voc; lat->lmscale = 1.0; lat->wdpenalty = 0.0; ln = lat->lnodes; ln->n=0; ln->v=0; ln = lat->lnodes+nNode-1; ln->n=0; ln->v=0; gid = wpg.glist; j = 0; while (gid != NULL) { ln = lat->lnodes+gid->wordNum; ln->word = gid->wd; ln->n=0; ln->v=0; fid = gid->entry; while (fid != NULL) { la = lat->larcs+j; la->start = ln; toNode = lat->lnodes+((GramEntry *) (fid->wd->aux))->wordNum; if (toNode == lat->lnodes) toNode = lat->lnodes+nNode-1; la->end = toNode; la->lmlike = log(1.0/((float) gid->numFllrs)); j++; fid = fid->next; } gid = gid->next; } if (bStartId != NULL) { wd = GetWord(voc,bStartId,TRUE); ln = lat->lnodes; ln->word = wd; wd = GetWord(voc,bEndId,TRUE); ln = lat->lnodes+nNode-1; ln->word = wd; } else { ln = lat->lnodes; ln->word = voc->nullWord; ln = lat->lnodes+nNode-1; ln->word = voc->nullWord; } return lat;}/* ------------------- End of HBuild.c --------------------------------- */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -