📄 hlat.c
字号:
#endif if (trace & T_PRUN) printf ("lattice pruned from %d/%d to %d/%d\n", lat->nn, lat->na, nn, na); /* build new lattice from remaining nodes/arcs */ newlat = NewILattice (heap, nn, na, lat); /* copy all the nodes we want to keep */ newln = newlat->lnodes; for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { if (ln->n != -1) { *newln = *ln; /* init linnked lists */ newln->foll = newln->pred = NULL; ++newln; } } assert (newln == newlat->lnodes + nn); newla = newlat->larcs; for (i = 0, la = lat->larcs; i < lat->na; ++i, ++la) { if (la->score >= 0.0) { /* keep */ *newla = *la; newla->start = newlat->lnodes + ((int) la->start->n); newla->end = newlat->lnodes + ((int) la->end->n); /* insert newla into foll list */ newla->farc = newla->start->foll; newla->start->foll = newla; /* ...and into pred list */ newla->parc = newla->end->pred; newla->end->pred = newla; ++newla; } } assert (newla == newlat->larcs + na); LatDetachInfo (&gcheap, lat); return newlat;}/* StatsInfo structure attached to all LNodes */typedef struct _StatsInfo { LogDouble nPaths; /* number of paths from start node */} StatsInfo;#define LNodeStats(ln) (((StatsInfo *) (ln)->hook))/* CalcStats calculate and output some global statistics for a lattice*/void CalcStats (Lattice *lat){ LNode **topOrder; LNode *ln; LArc *la; int i, d, max_inDegree, max_outDegree, nWords; LogDouble nPaths; Boolean isDAG; LNode *lnStart, *lnEnd; Word word; /* attach StatsInfo structre to nodes */ LatAttachInfo (&gcheap, sizeof (StatsInfo), lat); for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) LNodeStats(ln)->nPaths = 0; /* find topological order of nodes */ topOrder = (LNode **) New (&gcheap, lat->nn * sizeof(LNode *)); isDAG = LatTopSort (lat, topOrder); lnStart = isDAG ? topOrder[0] : LatStartNode (lat); lnEnd = isDAG ? topOrder[lat->nn-1] : LatEndNode (lat); max_inDegree = max_outDegree = 0; LNodeStats(lnStart)->nPaths = 1; /* reset word counters */ for (i = 0; i< VHASHSIZE; i++) for (word = lat->voc->wtab[i]; word != NULL; word = word->next) word->aux = (Ptr) 0; /* iterate over all nodes */ for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; /* count words */ ln->word->aux = (Ptr) (((int)ln->word->aux) + 1); /* count incoming and outgoing arcs */ d = 0; for (la = ln->pred; la; la = la->parc) ++d; if (d > max_inDegree) max_inDegree = d; d = 0; for (la = ln->foll; la; la = la->farc) ++d; if (d > max_outDegree) max_outDegree = d; if (isDAG) { /* propagate nPaths forward */ nPaths = LNodeStats(ln)->nPaths; for (la = ln->foll; la; la = la->farc) LNodeStats(la->end)->nPaths += nPaths; } } /* find number of words seen in lattice */ nWords = 0; for (i = 0; i < VHASHSIZE; i++) for (word = lat->voc->wtab[i]; word != NULL; word = word->next) { if (word->aux) ++nWords; word->aux = (Ptr) 0; } nPaths = LNodeStats(topOrder[lat->nn-1])->nPaths; printf("length[s] %.2f\n", lnEnd->time); printf("ArcsPerSec %.2f\n", lat->na/lnEnd->time); printf("nodes %d\n", lat->nn); printf("arcs: %d\n", lat->na); printf("max_inDegree %d\n", max_inDegree); printf("max_outDegree %d\n", max_outDegree); printf("nWords %d\n", nWords); if (isDAG) printf("nPaths %e.0\n", nPaths); else printf("nPaths inf\n"); Dispose (&gcheap, topOrder);}/* LatSetBoundaryWords set start and end words for use in lattices and LM*/void LatSetBoundaryWords (char *start, char *end, char *startLM, char *endLM){ startWord = GetLabId (start ? start : "!SENT_START", TRUE); startLMWord = GetLabId (startLM ? startLM : "<s>", TRUE); endWord = GetLabId (end ? end : "!SENT_END", TRUE); endLMWord = GetLabId (endLM ? endLM : "</s>", TRUE); nullWord = GetLabId ("!NULL", TRUE);}/* FixBadLat if final word is !NULL replace it with endWord. This happens in lattices produced with some older decoders.*/void FixBadLat (Lattice *lat){ LNode *ln; LArc *la; Boolean seenSE, seenOther; Word end; LatCheck (lat); end = GetWord (lat->voc, endWord, FALSE); if (!end) HError (8623, "HLRescore: SentEnd word (%d) not in vocabulary", endWord->name); ln = LatEndNode (lat); if (ln->word == end) return; if (ln->word && ln->word != lat->voc->nullWord) HError (8624, "HLRescore: final word in lattice (%s) is not !NULL! or sentend", ln->word->wordName->name); /* now we know that we have !NULL at the end */ seenSE = seenOther = FALSE; for (la = ln->pred; la; la = la->parc) { if (la->start->word == end) seenSE = TRUE; else seenOther = TRUE; } if (seenSE && seenOther) HError (8624, "HLRescore: lattice contains both SentEnd and other words in final position"); if (!seenOther) return; ln->word = end; ln->v = 1;}#ifndef NO_LAT_LM/* LatLMTrans wrapper around HLM:LMTrans() to take start and end words into account*/static LogFloat LatLMTrans (LModel *lm, LMState src, LabId wordId, LMState *dest){ LogFloat prob; if (wordId == nullWord) { dest = NULL; return 0.0; } else if (wordId == startWord && src == NULL) { /* always return P(<s>) = 1.0 at start of sentence */ wordId = startLMWord; prob = LMTrans (lm, src, wordId, dest); return 0.0; } else if (wordId == endWord) { /* destination state of </s> transition is always NULL */ wordId = endLMWord; prob = LMTrans (lm, src, wordId, dest); *dest = NULL; return prob; } else { prob = LMTrans (lm, src, wordId, dest); } return prob;}/* FindAddSubLNode Search for SubLNode in chain and add it if necessary*/static SubLNode *FindAddSubLNode (MemHeap *heap, LNode *ln, LMState lmstate, int *nsln){ SubLNode *subln; for (subln = (SubLNode *) ln->hook; subln; subln = subln->next) { if (subln->data.lmstate == lmstate) return subln; } if (!subln) { ++*nsln; subln = New (heap, sizeof (SubLNode)); subln->data.lmstate = lmstate; subln->foll = NULL; subln->next = (SubLNode *) ln->hook; ln->hook = (Ptr) subln; } return subln;}/* EXPORT->LatExpand expand lattice using new (typically higher-order) language Model*/Lattice *LatExpand (MemHeap *heap, Lattice *lat, LModel *lm){ int i, nsln, nsla; LNode *ln, *newln; LNode **topOrder; LArc *la, *newla; SubLNode *startSLN, *endSLN, *sln; SubLArc *sla; LogFloat lmprob; LMState dest; Lattice *newlat; nsln = nsla = 0; /* The idea of this algorithm is that we will split each node and arc in the lattice into multiple sub-nodes and sub-arcs as required by the new LM. N.B.We never join existing nodes, i.e. Calling LatExpand() with a bigram on a fourgram lattice will not reduce the size of the lattice. */ /* for each node in the lattice we keep a linked list (hung of ln->hook) of sub-nodes (corresponding to LMStates in the new LM). */ /* init sub-node linked lists */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { ln->hook = NULL; } /* create one sub-node for lattice start node with LMState = NULL */ FindAddSubLNode (&slnHeap, LatStartNode (lat), NULL, &nsln); /* find topological order of nodes */ topOrder = (LNode **) New (&gcheap, lat->nn * sizeof(LNode *)); LatTopSort (lat, topOrder); /* create lists of sub-nodes and sub-arcs and count them as we go along */ for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; for (startSLN = (SubLNode *) ln->hook; startSLN; startSLN = startSLN->next) { /* for each outgoing arc from current subLNode */ for (la = ln->foll; la; la = la->farc) { assert (la->start == ln); lmprob = LatLMTrans (lm, startSLN->data.lmstate, la->end->word->wordName, &dest); endSLN = FindAddSubLNode (&slnHeap, la->end, dest, &nsln); /* add new subLArc */ ++nsla; sla = New (&slaHeap, sizeof (SubLArc)); sla->lmprob = lmprob; sla->end = endSLN; sla->la = la; /* add to list of arcs leaving startSLN */ sla->next = startSLN->foll; startSLN->foll = sla; } } } if (trace & T_EXP) printf ("expanded lattice from %d/%d to %d/%d\n", lat->nn, lat->na, nsln, nsla); /* build new lattice from sub-node/-arc lists */ newlat = NewILattice (heap, nsln, nsla, lat); newlat->net = CopyString (heap, lm->name); /* create one node in new lattice for each sub node in old lattice */ newln = newlat->lnodes; for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; for (sln = (SubLNode *) ln->hook; sln; sln = sln->next) { *newln = *ln; newln->foll = newln->pred = NULL; newln->n = 0; newln->hook = NULL; sln->data.newln = newln; ++newln; } } assert (newln = newlat->lnodes + newlat->nn); /* create arcs in new lattice */ newla = newlat->larcs; for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; for (sln = (SubLNode *) ln->hook; sln; sln = sln->next) { newln = sln->data.newln; for (sla = sln->foll; sla; sla = sla->next) { *newla = *sla->la; newla->start = newln; newla->end = sla->end->data.newln; newla->lmlike = sla->lmprob; /* add to start node foll list */ newla->farc = newla->start->foll; newla->start->foll = newla; /* add to end node pred list */ newla->parc = newla->end->pred; newla->end->pred = newla; ++newla; } } } assert (newla == newlat->larcs + newlat->na); if (trace & T_MEM) { printf("Memory State after expanding\n"); PrintAllHeapStats(); } Dispose (&gcheap, topOrder); ResetHeap (&slaHeap); ResetHeap (&slnHeap); return newlat;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -