📄 hlat.c
字号:
sort lattice nodes in topological order returns array of LNode pointers. uses depth first traversal (in reverse (pred) direction) based on [CLR:1990], p. 485 */Boolean LatTopSort (Lattice *lat, LNode **topOrder){ int time = -1; /* new numbering will start at 0 */ int i; LNode *ln; LArc *la; Boolean isDAG = TRUE; for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) ln->n = -1; /* mark node as unseen (WHITE in CLR) */ LatTopSortVisit (LatEndNode (lat), &time); /* we should have seen all nodes */ assert (time+1 == lat->nn); /* check topological order */ for (i = 0, la = lat->larcs; i < lat->na; ++i, ++la) if (!(la->start->n < la->end->n)) { isDAG = FALSE; HError (-8622, "LatTopSort: Lattice contains cycles"); break; } for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) topOrder[ln->n] = ln; /*#### GE: reset ln->n values? */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) ln->n = i; return isDAG;}/* LatAttachInfo allocate & attach an Info structre for each node*/void LatAttachInfo (MemHeap *heap, size_t size, Lattice *lat){ int i; LNode *ln; for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) ln->hook = (Ptr) New (heap, size);}/* LatDetachInfo free Info structre for each node*/void LatDetachInfo (MemHeap *heap, Lattice *lat){ int i; LNode *ln; for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) Dispose (heap, ln->hook);}/* LatForwBackw perform forward-backward algorithm on lattice and store scores in FBInfo structre choice of using sum (LATFB_SUM) or max (LATFB_MAX) of scores*/LogDouble LatForwBackw (Lattice *lat, LatFBType type){ int i; LNode *ln; LArc *la; LNode **topOrder; LogDouble score; /* We assume that the FBinfo structures are already allocated. */ /* init */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { LNodeFw (ln) = LNodeBw (ln) = LZERO; } LNodeFw (LatStartNode (lat)) = 0.0; LNodeBw (LatEndNode (lat)) = 0.0; /* find topological order of nodes */ topOrder = (LNode **) New (&gcheap, lat->nn * sizeof(LNode *)); if (!LatTopSort (lat, topOrder)) HError (8622, "LatForwBackw: cannot calculate forw/backw score on Lattice with cycles"); /* the processing in the forward and backward directions are really almost the same. if the data structures had been done _slightly_ nicer this could be done in one loop. The only readable way now would be defining a couple of macros... */ /* forward direction */ for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; for (la = ln->foll; la; la = la->farc) { assert (la->start == ln); score = LNodeFw (ln) + LArcTotLike (lat, la); switch (type) { case LATFB_SUM: LNodeFw (la->end) = LAdd (LNodeFw (la->end), score); break; case LATFB_MAX: if (score > LNodeFw (la->end)) LNodeFw (la->end) = score; break; default: abort (); } } } /* backward direction */ for (i = lat->nn - 1; i >= 0; --i) { ln = topOrder[i]; for (la = ln->pred; la; la = la->parc) { assert (la->end == ln); score = LNodeBw (ln) + LArcTotLike (lat, la); switch (type) { case LATFB_SUM: LNodeBw (la->start) = LAdd (LNodeBw (la->start), score); break; case LATFB_MAX: if (score > LNodeBw (la->start)) LNodeBw (la->start) = score; break; default: abort (); } } } if (trace & T_FB) { printf ("forward prob: %f\n", LNodeFw (topOrder[lat->nn - 1])); printf ("backward prob: %f\n", LNodeBw (topOrder[0])); } score = LNodeBw (topOrder[0]); Dispose (&gcheap, topOrder); return score;}/* EXPORT->LatFindBest find the N-best paths (i.e. lowest sum of LArcTotLike()s) and generate Transcription. ####GE: currently only N=1 is supported*/Transcription *LatFindBest (MemHeap *heap, Lattice *lat, int N){ int i; LNode *ln; LNode **topOrder; LArc *la; LogDouble score, ac, lm, pr, tot; Word nullWord; Pron pron; Transcription *trans; LabList *ll; LLink lab; if (N != 1) HError (8690, "FindBest: only 1-best supported, yet."); /* during the search ln->score will hold the score of the best path to ln (i.e. lowest score). ln->hook will point to the arc leading to the preceeding node in this path */ /* init fields */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { ln->score = LZERO; ln->hook = NULL; } LatStartNode (lat)->score = 0.0; /* find topological order of nodes */ topOrder = (LNode **) New (&gcheap, lat->nn * sizeof(LNode *)); if (!LatTopSort (lat, topOrder)) HError (8690, "LatFindBest: cannot find best path in Lattice with cycles"); assert (topOrder[0] == LatStartNode (lat)); /* traverse nodes in top order */ for (i = 0; i < lat->nn; ++i) { ln = topOrder[i]; /* for all outgoing arcs: propagate scores forward */ for (la = ln->foll; la; la = la->farc) { assert (la->start == ln); score = la->start->score + LArcTotLike (lat, la); if (score > la->end->score) { la->end->score = score; la->end->hook = (Ptr) la; } } } /* create traqnscription */ trans = CreateTranscription (heap); ll = CreateLabelList (heap, 0); nullWord = lat->voc->nullWord; ac = lm = pr = tot = 0; if (trace & T_TRAN) printf ("1best trans (wordPron t ac lm pr tot): "); /* backtrack from end node along best path and generate transcription */ ln = LatEndNode (lat); la = (LArc *) ln->hook; while (la) { LabId outlab; if (ln->word != nullWord) { for (pron = ln->word->pron; pron; pron = pron->next) if (pron->pnum == ln->v) break; if (pron) outlab = pron->outSym; else /* if we can't find pronvar (e.g. wlist), fall back to word */ outlab = ln->word->wordName; if (outlab) { lab = CreateLabel (heap, ll->maxAuxLab); lab->labid = outlab; lab->start = la->start->time * 1.0e7; lab->end = ln->time * 1.0e7; lab->score = LArcTotLike (lat, la); lab->succ = ll->head->succ; lab->pred = ll->head; lab->succ->pred = lab->pred->succ = lab; } } ac += la->aclike; lm += la->lmlike; pr += la->prlike; tot += LArcTotLike (lat, la); if (trace & T_TRAN) printf ("(%s%d %.2f %.3f %.3f %.3f %.3f) ", ln->word->wordName->name, ln->v, ln->time, la->aclike, la->lmlike, la->prlike, LArcTotLike (lat, la)); ln = la->start; la = (LArc *) ln->hook; } AddLabelList (ll, trans); if (trace & T_TRAN) { printf ("\n"); printf ("ac lm pr tot: %.3f %.3f %.3f %.3f\n", ac, lm, pr, tot); } Dispose (&gcheap, topOrder); return trans;}/* EXPORT->LatSetScores set ln->score values to node posterior to make WriteLattice() deterministic*/void LatSetScores (Lattice *lat){ LogDouble best; LNode *ln; int i; LatAttachInfo (&gcheap, sizeof (FBinfo), lat); best = LatForwBackw (lat, LATFB_MAX); for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { ln->score = LNodeFw (ln) + LNodeBw (ln); } LatDetachInfo (&gcheap, lat);}/* EXPORT->LatPrune prune lattice by removing all paths with score more than thresh lower than the best path.*/Lattice *LatPrune (MemHeap *heap, Lattice *lat, LogDouble thresh, float arcsPerSec){ LogDouble best, limit, score; LNode *ln, *newln; LArc *la, *newla; int i, nn, na; Lattice *newlat; LatAttachInfo (&gcheap, sizeof (FBinfo), lat); best = LatForwBackw (lat, LATFB_MAX); limit = best - thresh; /* modify thresh according to arcPerSec limit */ if (arcsPerSec > 0) {#define NBIN 1000 HTime length; int nArc, nArcLimit, bin; int hist[NBIN]; float binWidth; length = LatEndNode (lat)->time; nArcLimit = length * arcsPerSec; binWidth = thresh / NBIN; for (i = 0; i < NBIN; ++i) hist[i] = 0; nArc = 0; for (i = 0, la = lat->larcs; i < lat->na; ++i, ++la) { score = LNodeFw (la->start) + LArcTotLike (lat, la) + LNodeBw (la->end); bin = (best - score) / binWidth; assert (bin >= 0); if (bin < NBIN) { /* keep */ ++hist[bin]; ++nArc; } } if (nArc > nArcLimit) { nArc = 0; for (i = 0; i < NBIN; ++i) { nArc += hist[i]; if (nArc > nArcLimit) break; } thresh = i * binWidth; limit = best - thresh; if (trace &T_PRUN) printf ("length %.2f nArcLimit %d beam adjusted to %f\n", length, nArcLimit, thresh); } } nn = na = 0; /* scan nodes, count survivors and verify consistency */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { score = LNodeFw (ln) + LNodeBw (ln); if (score >= limit) { /* keep */ ln->n = nn; ++nn; } else { ln->n = -1; } } /* scan arcs and count survivors */ for (i = 0, la = lat->larcs; i < lat->na; ++i, ++la) { if (beamPruneArcs) { score = LNodeFw (la->start) + LArcTotLike (lat, la) + LNodeBw (la->end); if (score >= limit) { /* keep */ la->score = (float) na; ++na; } else /* remove */ la->score = -1.0; } else { /* only prune arc if either node is dead */ if (la->start->n != -1 && la->end->n != -1) { /* both nodes are alive => keep arc */ la->score = (float) na; ++na; } else /* remove */ la->score = -1.0; } }#if 0 /* SANITY check */ for (i = 0, ln = lat->lnodes; i < lat->nn; ++i, ++ln) { if (ln->n == -1) { /* check that there are no live arcs attached to dead nodes */ for (la = ln->foll; la; la = la->farc) { if (la->score >= 0.0) HError (8691, "LatPrune: arc score (%f) better than node score (%f)\n", LNodeFw (la->start) + LArcTotLike (lat, la) + LNodeBw (la->end), score); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -