flat_fwd.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页
C
2,212 行
h->score[to] = scr; if (bestfrom >= 0) { h->history[to] = h->history[bestfrom]; h->pid[to] = h->pid[bestfrom]; } if (bestscr < scr) bestscr = scr; } h->bestscore = bestscr;}#endifstatic int32 whmm_eval (int32 *senscr){ int32 best, cf; s3wid_t w; whmm_t *h, *nexth, *prevh; int32 n_mpx, n_nonmpx; best = S3_LOGPROB_ZERO; n_mpx = n_nonmpx = 0; cf = n_frm; for (w = 0; w < dict->n_word; w++) { prevh = NULL; for (h = whmm[w]; h; h = nexth) { nexth = h->next; if (h->active == cf) { if (h->pos == 0) { eval_mpx_whmm (w, h, senscr); n_mpx++; } else { eval_nonmpx_whmm (w, h, senscr); n_nonmpx++; } if (best < h->bestscore) best = h->bestscore; prevh = h; } else { if (prevh) prevh->next = nexth; else whmm[w] = nexth; whmm_free (h); } } } pctr_increment (ctr_mpx_whmm, n_mpx); pctr_increment (ctr_nonmpx_whmm, n_nonmpx); return best;}/** * Record a word exit in word lattice. * NOTE: All exits from a single word in a given frame (for different right context * ciphones) must occur contiguously. */static void lattice_entry (s3wid_t w, int32 f, whmm_t *h){ s3cipid_t rc, npid; if ((n_lat_entry <= 0) || (lattice[n_lat_entry-1].wid != w) || (lattice[n_lat_entry-1].frm != f)) { /* New lattice entry */ if (n_lat_entry >= lat_alloc) { printf ("\n"); E_INFO("Lattice size(%d) exceeded; increasing to %d\n", lat_alloc, lat_alloc+LAT_ALLOC_INCR); lat_alloc += LAT_ALLOC_INCR; lattice = ckd_realloc (lattice, lat_alloc * sizeof(lattice_t)); } lattice[n_lat_entry].wid = w; lattice[n_lat_entry].frm = (s3frmid_t) f; lattice[n_lat_entry].score = h->score[final_state]; lattice[n_lat_entry].history = h->history[final_state]; /* Allocate space for different right context scores */ npid = get_rc_npid (w); assert (npid > 0); lattice[n_lat_entry].rcscore = (int32 *) ckd_calloc (npid, sizeof(int32)); for (rc = 0; rc < npid; rc++) lattice[n_lat_entry].rcscore[rc] = S3_LOGPROB_ZERO; n_lat_entry++; } /* Slight BUG here: each rc can have its own history, but only the best retained!! */ if (lattice[n_lat_entry-1].score < h->score[final_state]) { lattice[n_lat_entry-1].score = h->score[final_state]; lattice[n_lat_entry-1].history = h->history[final_state]; } lattice[n_lat_entry-1].rcscore[h->rc] = h->score[final_state];}/** * Transition from hmm h into the next appropriate one for word w. * Threshold check for incoming score already completed. * The next HMM may be the last triphone for the word w, in which case, instantiate * multiple instances corresponding cross-word triphone modelling for all right context * ciphones. */static void whmm_transition (int32 w, whmm_t *h){ int32 lastpos, npid, nf; whmm_t *nexth, *prevh; s3cipid_t rc; s3pid_t *pid; lastpos = dict->word[w].pronlen - 1; nf = n_frm+1; if (h->pos < lastpos-1) { /* * Transition to word HMM that's not the final one in word. First, allocate * the HMM if not already present. */ if ((! h->next) || (h->next->pos != h->pos+1)) { nexth = whmm_alloc (h->pos+1); nexth->pid = &(wwpid[w][nexth->pos]); nexth->next = h->next; h->next = nexth; } /* Transition to next HMM */ nexth = h->next; if (h->score[final_state] > nexth->score[0]) { nexth->score[0] = h->score[final_state]; nexth->history[0] = h->history[final_state]; nexth->active = nf; /* Ensure it doesn't get pruned */ } } else { /* * Transition to final HMMs in word, with full cross-word rc modelling. Allocate * all final triphone HMM instances first. */ prevh = h; get_rcpid (w, &pid, &npid); for (rc = 0; rc < npid; rc++) { if ((! prevh->next) || (prevh->next->rc != rc)) { nexth = whmm_alloc (h->pos+1); nexth->rc = rc; nexth->pid = &(pid[rc]); nexth->next = prevh->next; prevh->next = nexth; } prevh = prevh->next; } /* Transition to next HMMs */ for (rc = 0, nexth = h->next; rc < npid; rc++, nexth = nexth->next) { if (h->score[final_state] > nexth->score[0]) { nexth->score[0] = h->score[final_state]; nexth->history[0] = h->history[final_state]; nexth->active = nf; /* Ensure it doesn't get pruned */ } } }}static void whmm_exit (int32 thresh, int32 wordthresh){ s3wid_t w; whmm_t *h; int32 pronlen, nf; nf = n_frm+1; for (w = 0; w < dict->n_word; w++) { pronlen = dict->word[w].pronlen; for (h = whmm[w]; h; h = h->next) { if (h->bestscore >= thresh) { if (h->pos == pronlen-1) { if (h->score[final_state] >= wordthresh) lattice_entry (w, n_frm, h); } else { if (h->score[final_state]+phone_penalty >= thresh) whmm_transition (w, h); } h->active = nf; } } }}/** * Get the last two non-filler, non-silence lattice words w0 and w1 (base word-ids), * starting from l. w1 is later than w0. At least w1 must exist; w0 may not. */static void two_word_history (s3latid_t l, s3wid_t *w0, s3wid_t *w1){ s3latid_t l0, l1; l0=0; for (l1 = l; filler_word(lattice[l1].wid); l1 = lattice[l1].history);/* BHIKSHA HACK - PERMIT MULTIPLE PRONS FOR <s> */if (l1 != -1) for (l0 = lattice[l1].history; (IS_S3LATID(l0)) && (filler_word(lattice[l0].wid)); l0 = lattice[l0].history);/* BHIKSHA HACK - PERMIT MULTIPLE PRONS FOR <s> */if (l1 == -1) *w1 = 0; else *w1 = dict_basewid(dict, lattice[l1].wid);if (l1 == -1) *w0 = BAD_S3WID; else *w0 = (NOT_S3LATID(l0)) ? BAD_S3WID : dict_basewid(dict,lattice[l0].wid);}/** * Transition into a word w. Since we transition into the first phone position, the * triphone model must be derived from the incoming left context ciphone. The first * state of the whmm instance inherits this triphone model and propagates it along with * the score. * If the first phone is also the last (single-phone word), we must also model all * possible right context ciphones, by instantiating separate whmm models for each rc. */static void word_enter (s3wid_t w, int32 score, s3latid_t l, s3cipid_t lc){ whmm_t *h, *prevh; s3cipid_t b, rc; s3pid_t pid, *rpid; int32 s, npid, nf; nf = n_frm+1; b = dict->word[w].ciphone[0]; if (dict->word[w].pronlen > 1) { /* Multi-phone word; no right context problem */ rc = dict->word[w].ciphone[1]; pid = lcpid[b][rc].pid[lcpid[b][rc].cimap[lc]]; if ((! whmm[w]) || (whmm[w]->pos != 0)) { h = whmm_alloc (0); for (s = 0; s < n_state; s++) h->pid[s] = pid; h->next = whmm[w]; whmm[w] = h; } h = whmm[w]; if (score > h->score[0]) { h->score[0] = score; h->history[0] = l; h->pid[0] = pid; h->active = nf; } } else { /* Do all right contexts; first make sure all are allocated */ prevh = NULL; h = whmm[w]; npid = get_rc_npid (w); rpid = lrcpid[b][lc].pid; for (rc = 0; rc < npid; rc++) { if ((! h) || (h->rc != rc)) { h = whmm_alloc (0); for (s = 0; s < n_state; s++) h->pid[s] = rpid[rc]; h->rc = rc; if (prevh) { h->next = prevh->next; prevh->next = h; } else { h->next = whmm[w]; whmm[w] = h; } } prevh = h; h = h->next; } assert (! h); /* Transition to the allocated HMMs */ b = dict->word[w].ciphone[0]; for (rc = 0, h = whmm[w]; rc < npid; rc++, h = h->next) { if (score > h->score[0]) { h->score[0] = score; h->history[0] = l; h->pid[0] = rpid[rc]; h->active = nf; } } }}/** * Backoff node when backing off all the way to unigrams. Since each word exits with * #ciphones different scores (for so many different right contexts), a separate node * exists for each context. */typedef struct { s3latid_t latid; /** History entry */ int32 score; /** Acoustic + backed off LM score */ s3cipid_t lc; /** Last ciphone of history entry, to be used as left context upon entering a new word. */} backoff_t;static backoff_t *ug_backoff, *filler_backoff;static uint8 *tg_trans_done; /** If tg_trans_done[w] TRUE, trigram transition to w occurred for a given history, and backoff bigram transition from same history should be avoided */static int32 *rcscore = NULL; /** rc scores uncompacted; one entry/rc-ciphone */static s3wid_t *word_cand_cf; /** BAD_S3WID terminated array of candidate words for word transition in current frame (if using input word lattices to restrict search). *//** * Unigrams re-organized for faster unigram word transitions. Words partitioned by * their first CI phone and ordered in descending unigram probability within each * partition. */typedef struct word_ugprob_s { s3wid_t wid; int32 ugprob; struct word_ugprob_s *next;} word_ugprob_t;static word_ugprob_t **word_ugprob;/** * Build array of candidate words that start around the current frame (cf). * Note: filler words are not in this list since they are always searched (see * word_trans). */static void build_word_cand_cf (int32 cf){ int32 f, sf, ef; s3wid_t w, n; word_cand_t *candp; for (w = 0; w < dict->n_word; w++) word_cand_cf[w] = 0; if ((sf = cf - word_cand_win) < 0) sf = 0; if ((ef = cf + word_cand_win) >= S3_MAX_FRAMES) ef = S3_MAX_FRAMES-1; for (f = sf; f <= ef; f++) { for (candp = word_cand[f]; candp; candp = candp->next) word_cand_cf[candp->wid] = 1; } word_cand_cf[startwid] = 0; /* Never hypothesized (except at beginning) */ for (w = dict->filler_start; w <= dict->filler_end; w++) word_cand_cf[w] = 0; /* Handled separately */ word_cand_cf[finishwid] = 1; /* Always a candidate */ n = 0; for (w = 0; w < dict->n_word; w++) if (word_cand_cf[w]) word_cand_cf[n++] = w; word_cand_cf[n] = BAD_S3WID;}/** Transition for one word. */static void word_trans (int32 thresh){ s3latid_t l; /* lattice entry index */ s3cipid_t *rcmap, rc, lc; s3wid_t w, bw0, bw1, nextwid; tg_t *tgptr; bg_t *bgptr; int32 bowt, acc_bowt, newscore; int32 n_tg, n_bg; int32 cand, lscr; int32 lat_start; /* int32 tempi, temp_j;*/ lat_start = frm_latstart[n_frm]; for (rc = 0; rc < mdef->n_ciphone; rc++) { ug_backoff[rc].score = S3_LOGPROB_ZERO; filler_backoff[rc].score = S3_LOGPROB_ZERO; } if (n_word_cand > 0) build_word_cand_cf (n_frm); /* Trigram/Bigram word transitions from words just exited */ for (l = lat_start; l < n_lat_entry; l++) { w = lattice[l].wid; if (w == finishwid) continue; /* Cross-word left context for new words to which we may transition */ lc = dict->word[w].ciphone[dict->word[w].pronlen-1]; /* Uncompact path scores for all right context ciphones for word just finished */ rcmap = get_rc_cimap (w); for (rc = 0; rc < mdef->n_ciphone; rc++) rcscore[rc] = lattice[l].rcscore[rcmap[rc]]; /* Find the last real (non-filler, non-silence) two-word history */ two_word_history (l, &bw0, &bw1); if (n_word_cand <= 0) { /* Transition to all words in vocab */ /* Clear trigram transition flag for each word for this history */ memset (tg_trans_done, 0, dict->n_word*sizeof(uint8)); /* First, transition to trigram followers of bw0, bw1 */ acc_bowt = 0; if ((IS_S3WID(bw0)) && ((n_tg = lm_tglist (lm, dict2lmwid[dict_basewid(dict,bw0)], dict2lmwid[dict_basewid(dict,bw1)], &tgptr, &bowt)) > 0)) { /* Transition to trigram followers of bw0, bw1, if any */ for (; n_tg > 0; --n_tg, tgptr++) { /* Transition to all alternative pronunciations for trigram follower */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?