flat_fwd.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页
C
2,212 行
nextwid = LM_DICTWID(lm, tgptr->wid); if (IS_S3WID(nextwid) && (nextwid != startwid)) { for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) { newscore = rcscore[dict->word[w].ciphone[0]] + LM_TGPROB (lm, tgptr) + phone_penalty; if (newscore >= thresh) { word_enter (w, newscore, l, lc); tg_trans_done[w] = 1; } } } } acc_bowt = bowt; } /* Transition to bigram followers of bw1 */ if ((n_bg = lm_bglist (lm, dict2lmwid[dict_basewid(dict,bw1)], &bgptr, &bowt)) > 0) { /* Transition to bigram followers of bw1, if any */ for (; n_bg > 0; --n_bg, bgptr++) { /* Transition to all alternative pronunciations for bigram follower */ nextwid = LM_DICTWID (lm, bgptr->wid); if (IS_S3WID(nextwid) && (! tg_trans_done[nextwid]) && /* TG transition already done */ (nextwid != startwid)) { /* No transition to <s> */ for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) { newscore = rcscore[dict->word[w].ciphone[0]] + LM_BGPROB (lm, bgptr) + acc_bowt + phone_penalty; if (newscore >= thresh) word_enter (w, newscore, l, lc); } } } acc_bowt += bowt; } /* Update unigram backoff node */ for (rc = 0; rc < mdef->n_ciphone; rc++) { if (rcscore[rc] <= S3_LOGPROB_ZERO) continue; if (rcscore[rc]+acc_bowt+phone_penalty > ug_backoff[rc].score) { ug_backoff[rc].score = rcscore[rc]+acc_bowt+phone_penalty; ug_backoff[rc].latid = l; ug_backoff[rc].lc = lc; } } } else { /* Transition to words in word_cand_cf */ for (cand = 0; IS_S3WID(word_cand_cf[cand]); cand++) { nextwid = word_cand_cf[cand]; lscr = lm_tg_score (lm, dict2lmwid[dict_basewid(dict,bw0)], dict2lmwid[dict_basewid(dict,bw1)], dict2lmwid[nextwid], nextwid); for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) { newscore = rcscore[dict->word[w].ciphone[0]] + lscr + phone_penalty; if (newscore >= thresh) word_enter (w, newscore, l, lc); } } } /* Update filler backoff node */ for (rc = 0; rc < mdef->n_ciphone; rc++) { if (rcscore[rc] <= S3_LOGPROB_ZERO) continue; if (rcscore[rc]+phone_penalty > filler_backoff[rc].score) { filler_backoff[rc].score = rcscore[rc]+phone_penalty; filler_backoff[rc].latid = l; filler_backoff[rc].lc = lc; } } } /* * We have finished transitions to all tg/bg followers of all words just ended. * Or, if working from a lattice, transitioned to all words that may start at this * point as indicated by the lattice. */ /* Transition to unigrams from backoff nodes (if not working from a lattice) */ if (n_word_cand <= 0) {#if 0 n_ug = lm_uglist (&ugptr); for (; n_ug > 0; --n_ug, ugptr++) { for (w = ugptr->dictwid; IS_S3WID(w); w = dict->word[w].alt) { if (w == startwid) continue; rc = dict->word[w].ciphone[0]; if (ug_backoff[rc].score >= thresh) { newscore = ug_backoff[rc].score + LM_UGPROB (lm, ugptr); if (newscore >= thresh) word_enter (w, newscore, ug_backoff[rc].latid, ug_backoff[rc].lc); } } }#else word_ugprob_t *wp; int32 rcscr; for (rc = 0; rc < mdef->n_ciphone; rc++) { rcscr = ug_backoff[rc].score; l = ug_backoff[rc].latid; lc = ug_backoff[rc].lc; for (wp = word_ugprob[rc]; wp; wp = wp->next) { newscore = rcscr + wp->ugprob; if (newscore < thresh) break; word_enter (wp->wid, newscore, l, lc); } }#endif } /* * Transition to silence and filler words. Assume alternative pronunciations * are all within filler_start..filler_end */ for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((w == startwid) || (w == finishwid)) continue; rc = dict->word[w].ciphone[0]; if (filler_backoff[rc].score > S3_LOGPROB_ZERO) { newscore = filler_backoff[rc].score + fillpen(fpen,dict_basewid(dict,w)); if (newscore >= thresh) word_enter (w, newscore, filler_backoff[rc].latid, filler_backoff[rc].lc); } } /* Free rcscore here, if necessary to conserve memory space */}/** Initialize the forward search. */void fwd_init (mdef_t* _mdef, tmat_t* _tmat, dict_t* _dict,lm_t *_lm){ float64 *f64arg; float32 *f32arg; char *tmpstr; E_INFO ("Forward Viterbi Initialization\n"); mdef = _mdef; tmat = _tmat; dict = _dict; lm = _lm; assert (mdef && tmat && dict && lm); /* HMM states information */ n_state = mdef->n_emit_state + 1; final_state = n_state - 1; /* Variables for speeding up whmm evaluation */ st_sen_scr = (int32 *) ckd_calloc (n_state-1, sizeof(int32)); /* Some key word ids */ silwid = dict_wordid (dict,S3_SILENCE_WORD); startwid = dict_wordid (dict,S3_START_WORD); finishwid = dict_wordid (dict,S3_FINISH_WORD); if ((NOT_S3WID(silwid)) || (NOT_S3WID(startwid)) || (NOT_S3WID(finishwid))) E_FATAL("%s, %s, or %s missing from dictionary\n", S3_SILENCE_WORD, S3_START_WORD, S3_FINISH_WORD); /* Beam widths and penalties */ f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); f64arg = (float64 *) cmd_ln_access ("-nwbeam"); wordbeam = logs3 (*f64arg); f32arg = (float32 *) cmd_ln_access ("-phonepen"); phone_penalty = logs3 (*f32arg); E_INFO ("logs3(beam)= %d, logs3(nwbeam)= %d\n", beam, wordbeam); /* Allocate whmm structure */ whmm = (whmm_t **) ckd_calloc (dict->n_word, sizeof(whmm_t *)); /* Allocate output word lattice structure */ lat_alloc = *((int32 *) cmd_ln_access ("-bptblsize")); lattice = (lattice_t *) ckd_calloc (lat_alloc, sizeof(lattice_t)); n_lat_entry = 0; /* Build cross-word triphone models */ tmp_xwdpid = (s3pid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3pid_t)); n_backoff_ci = 0; build_wwpid (); E_INFO("%d within-word triphone instances mapped to CI-phones\n", n_backoff_ci); n_backoff_ci = 0; build_xwdpid_map (); E_INFO("%d cross-word triphones mapped to CI-phones\n", n_backoff_ci); ckd_free (tmp_xwdpid); /* Data structures needed during word transition */ rcscore = (int32 *) ckd_calloc (mdef->n_ciphone, sizeof(int32)); ug_backoff = (backoff_t *) ckd_calloc (mdef->n_ciphone, sizeof(backoff_t)); filler_backoff = (backoff_t *) ckd_calloc (mdef->n_ciphone, sizeof(backoff_t)); tg_trans_done = (uint8 *) ckd_calloc (dict->n_word, sizeof(uint8)); /* Check transition matrices for upper-triangularity */ chk_tp_uppertri (); /* Input candidate-word lattices information to restrict search; if any */ word_cand_dir = (char *) cmd_ln_access ("-inlatdir"); latfile_ext = (char *) cmd_ln_access ("-latext"); word_cand_win = *((int32 *) cmd_ln_access ("-inlatwin")); if (word_cand_win < 0) { E_ERROR("Invalid -inlatwin argument: %d; set to 50\n", word_cand_win); word_cand_win = 50; } /* Allocate pointers to lists of word candidates in each frame */ if (word_cand_dir) { word_cand = (word_cand_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(word_cand_t *)); word_cand_cf = (s3wid_t *) ckd_calloc (dict->n_word+1, sizeof(s3wid_t)); } /* Space for first lattice entry in each frame (+ terminating sentinel) */ frm_latstart = (s3latid_t *) ckd_calloc (S3_MAX_FRAMES+1, sizeof(s3latid_t)); /* Allocate timers and counters for statistics gathering */ pctr_new(ctr_mpx_whmm,"mpx"); pctr_new(ctr_nonmpx_whmm,"~mpx"); pctr_new(ctr_latentry,"lat"); /* Word to be traced in detail */ if ((tmpstr = (char *) cmd_ln_access ("-tracewhmm")) != NULL) { trace_wid = dict_wordid (dict,tmpstr); if (NOT_S3WID(trace_wid)) E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr); } else trace_wid = BAD_S3WID; /* Active words to be dumped for debugging after the given frame no, if any */ tmpstr = (char *) cmd_ln_access ("-worddumpsf"); word_dump_sf = tmpstr ? *((int32 *) tmpstr) : (int32) 0x7ffffff0; /* Active HMMs to be dumped for debugging after the given frame no, if any */ tmpstr = (char *) cmd_ln_access ("-hmmdumpsf"); hmm_dump_sf = tmpstr ? *((int32 *) tmpstr) : (int32) 0x7ffffff0; /* Initialize word_ugprob (assuming there is only one LM) */ { s3wid_t w; s3cipid_t ci; int32 n_ug, ugprob; ug_t *ugptr; word_ugprob_t *wp, *prevwp; word_ugprob = (word_ugprob_t **) ckd_calloc (mdef->n_ciphone, sizeof(word_ugprob_t *)); n_ug = lm_uglist (lm,&ugptr); for (; n_ug > 0; --n_ug, ugptr++) { if ((w = ugptr->dictwid) == startwid) continue; ugprob = LM_UGPROB(lm, ugptr); for (; IS_S3WID(w); w = dict->word[w].alt) { ci = dict->word[w].ciphone[0]; prevwp = NULL; for (wp = word_ugprob[ci]; wp && (wp->ugprob >= ugprob); wp = wp->next) prevwp = wp; wp = (word_ugprob_t *) listelem_alloc (sizeof(word_ugprob_t)); wp->wid = w; wp->ugprob = ugprob; if (prevwp) { wp->next = prevwp->next; prevwp->next = wp; } else { wp->next = word_ugprob[ci]; word_ugprob[ci] = wp; } } } } /* Initialize bestpath search related */ dag.list = NULL;}static int32 word_cand_load (FILE *fp){ char line[1024], word[1024]; int32 i, k, n, nn, sf, seqno, lineno; s3wid_t w; word_cand_t *candp; /* Skip past Nodes parameter */ lineno = 0; nn = 0; word[0] = '\0'; while (fgets (line, sizeof(line), fp) != NULL) { lineno++; if ((sscanf (line, "%s %d", word, &nn) == 2) && (strcmp (word, "Nodes") == 0)) break; } if ((strcmp (word, "Nodes") != 0) || (nn <= 0)) { E_ERROR("%s: Nodes parameter missing from input lattice\n", uttid); return -1; } n = 0; for (i = 0; i < nn; i++) { if (fgets (line, 1024, fp) == NULL) { E_ERROR("%s: Incomplete input lattice\n", uttid); return -1; } lineno++; if ((k = sscanf (line, "%d %s %d", &seqno, word, &sf)) != 3) { E_ERROR("%s: Error in lattice, line %d: %s\n", uttid, lineno, line); return -1; } if (seqno != i) { E_ERROR("%s: Seq# error in lattice, line %d: %s\n", uttid, lineno, line); return -1; } if ((sf < 0) || (sf >= S3_MAX_FRAMES)) { E_ERROR("%s: Startframe error in lattice, line %d: %s\n", uttid, lineno, line); return -1; } w = dict_wordid (dict,word); if (NOT_S3WID(w)) { E_ERROR("%s: Unknown word in lattice: %s; ignored\n", uttid, word); continue; } w = dict_basewid(dict,w); /* Check node not already present; avoid duplicates */ for (candp = word_cand[sf]; candp && (candp->wid != w); candp = candp->next); if (candp) continue; candp = (word_cand_t *) listelem_alloc (sizeof(word_cand_t)); candp->wid = w; candp->next = word_cand[sf]; word_cand[sf] = candp; n++; } return n;}static void word_cand_free ( void ){ word_cand_t *candp, *next; int32 f; for (f = 0; f < S3_MAX_FRAMES; f++) { for (candp = word_cand[f]; candp; candp = next) { next = candp->next; listelem_free ((char *)candp, sizeof(word_cand_t)); } word_cand[f] = NULL; } n_word_cand = 0;}/* * Begin forward Viterbi search of one utterance */void fwd_start_utt (char *id){ int32 w, l, ispipe; char str[1024]; FILE *fp; uttid = ckd_salloc (id); ptmr_reset (&tm_hmmeval); ptmr_reset (&tm_hmmtrans); ptmr_reset (&tm_wdtrans); if (uttid) ckd_free (uttid); uttid = ckd_salloc (id); /* Free rcscores for each lattice entry */ for (l = 0; l < n_lat_entry; l++) { if (lattice[l].rcscore) { ckd_free (lattice[l].rcscore); lattice[l].rcscore = NULL; } } n_lat_entry = 0; /* If input lattice file containing word candidates to be searched specified; use it */ if (word_cand_dir) { sprintf (str, "%s/%s.%s", word_cand_dir, id, latfile_ext); E_INFO("Reading input lattice: %s\n", str); if ((fp = fopen_compchk (str, &ispipe)) == NULL) E_ERROR("fopen_compchk(%s) failed; running full search\n", str); else { if ((n_word_cand = word_cand_load (fp)) <= 0) { E_ERROR("Bad or empty lattice file: %s; ignored\n", str); word_cand_free (); } else E_INFO("%d lattice entries read\n", n_word_cand); fclose_comp (fp, ispipe); } } /* Enter all pronunciations of startwid (begin silence) */ n_frm = -1; /* Since word_enter transitions to "NEXT" frame */ for (w = startwid; IS_S3WID(w); w = dict->word[w].alt) word_enter (w, 0, BAD_S3LATID, dict->word[silwid].c
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?