s3_allphone.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 844 行 · 第 1/2 页
C
844 行
}static void phmm_eval (phmm_t *p, int32 *senscr){ int32 **tp; int32 nst, from, to, bestfrom, newscr, bestscr; history_t *besthist = (history_t *)0; nst = mdef->n_emit_state; tp = tmat->tp[p->tmat]; bestscr = S3_LOGPROB_ZERO; /* Update state scores from last to first (assuming no backward transitions) */ for (to = nst-1; to >= 0; --to) { /* Find best incoming score to the "to" state from predecessor states */ bestfrom = S3_LOGPROB_ZERO; for (from = to; from >= 0; from--) { if ((tp[from][to] > S3_LOGPROB_ZERO) && (p->score[from] > S3_LOGPROB_ZERO)) { newscr = p->score[from] + tp[from][to]; if (newscr > bestfrom) { bestfrom = newscr; besthist = p->hist[from]; } } } /* If looking at initial state, also consider incoming score */ if ((to == 0) && (p->inscore > bestfrom)) { bestfrom = p->inscore; besthist = p->inhist; } /* Update state score */ if (bestfrom > S3_LOGPROB_ZERO) { p->score[to] = bestfrom + senscr[p->sen[to]]; p->hist[to] = besthist; if (p->score[to] > bestscr) bestscr = p->score[to]; } } /* Update non-emitting exit state score */ bestfrom = S3_LOGPROB_ZERO; to = nst; for (from = nst-1; from >= 0; from--) { if ((tp[from][to] > S3_LOGPROB_ZERO) && (p->score[from] > S3_LOGPROB_ZERO)) { newscr = p->score[from] + tp[from][to]; if (newscr > bestfrom) { bestfrom = newscr; besthist = p->hist[from]; } } } p->score[to] = bestfrom; p->hist[to] = besthist; if (p->score[to] > bestscr) bestscr = p->score[to]; p->bestscore = bestscr;}/** Evaluate active PHMMs */static int32 phmm_eval_all (int32 *senscr){ s3cipid_t ci; phmm_t *p; int32 best; best = S3_LOGPROB_ZERO; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { if (p->active == curfrm) { phmm_eval (p, senscr); if (p->bestscore > best) best = p->bestscore; } } } return best;}static void phmm_exit (int32 best){ s3cipid_t ci; phmm_t *p; int32 th, nf, nst, s; history_t *h; th = best + beam; frm_hist[curfrm] = NULL; nf = curfrm+1; nst = mdef->n_emit_state; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { if (p->active == curfrm) { if (p->bestscore >= th) { /* Scale state scores to prevent underflow */ for (s = 0; s <= nst; s++) if (p->score[s] > S3_LOGPROB_ZERO) p->score[s] -= best; /* Create lattice entry if exiting */ if (p->score[nst] >= beam) { /* beam, not th because scores scaled */ h = (history_t *) listelem_alloc (sizeof(history_t)); h->score = p->score[nst]; h->ef = curfrm; h->phmm = p; h->hist = p->hist[nst]; h->next = frm_hist[curfrm]; frm_hist[curfrm] = h; n_histnode++; } /* Mark PHMM active in next frame */ p->active = nf; } else { /* Reset state scores */ for (s = 0; s <= nst; s++) { p->score[s] = S3_LOGPROB_ZERO; p->hist[s] = NULL; } } } /* Reset incoming score in preparation for cross-PHMM transition */ p->inscore = S3_LOGPROB_ZERO; } }}static void phmm_trans ( void ){ history_t *h; phmm_t *from, *to; plink_t *l; int32 newscore, nf; nf = curfrm+1; /* Transition from exited nodes to initial states of HMMs */ for (h = frm_hist[curfrm]; h; h = h->next) { from = h->phmm; for (l = from->succlist; l; l = l->next) { to = l->phmm; newscore = h->score + tp[(unsigned)from->ci][(unsigned)to->ci]; if ((newscore > beam) && (newscore > to->inscore)) { to->inscore = newscore; to->inhist = h; to->active = nf; } } }}int32 allphone_frame (int32 *senscr){ int32 bestscr; bestscr = phmm_eval_all (senscr); score_scale[curfrm] = bestscr; phmm_exit (bestscr); phmm_trans (); curfrm++; return 0;}/** Return accumulated score scale in frame range [sf..ef] */static int32 seg_score_scale (int32 sf, int32 ef){ int32 scale, s; for (s = sf, scale = 0; s <= ef; s++, scale += score_scale[s]); return scale;}/* Phone lattice node */typedef struct phlatnode_s { s3cipid_t ci; uint16 fef, lef; /* First and last end frame for this node */ struct phlatnode_s *next;} phlatnode_t;static void allphone_latdump (char *uttid, char *latdir){ int32 f, sf, latbeam, best, thresh, nnode; history_t *h; char filename[4096]; FILE *fp; float64 *f64arg; phlatnode_t **phlatnode, *p; f64arg = (float64 *) cmd_ln_access ("-phlatbeam"); latbeam = logs3 (*f64arg); sprintf (filename, "%s/%s.phlat", latdir, uttid); if ((fp = fopen(filename, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", filename); return; } phlatnode = (phlatnode_t **) ckd_calloc (curfrm+1, sizeof(phlatnode_t)); for (f = 0; f < curfrm; f++) { /* Find best score for this frame and set pruning threshold */ best = (int32)0x80000000; for (h = frm_hist[f]; h; h = h->next) if (h->score > best) best = h->score; thresh = best + latbeam; for (h = frm_hist[f]; h; h = h->next) { /* Skip this node if below threshold */ if (h->score < thresh) continue; sf = h->hist ? h->hist->ef + 1 : 0; assert (h->ef == f); /* Find phlatnode for this <ci,sf> pair */ for (p = phlatnode[sf]; p && (p->ci != h->phmm->ci); p = p->next); if (! p) { p = (phlatnode_t *) listelem_alloc (sizeof(phlatnode_t)); p->next = phlatnode[sf]; phlatnode[sf] = p; p->ci = h->phmm->ci; p->fef = p->lef = h->ef; } assert (p->lef <= h->ef); p->lef = h->ef;#if 0 score = h->score; if (h->hist) score -= h->hist->score; score += seg_score_scale (sf, h->ef); fprintf (fp, "%4d %3d %12d %s\n", /* startfrm endfrm ciphone */ sf, h->ef - sf + 1, score, mdef_ciphone_str (mdef, h->phmm->ci));#endif } } /* Write phone lattice; startframe, first end frame, last end frame, ciphone */ nnode = 0; for (f = 0; f <= curfrm; f++) { for (p = phlatnode[f]; p; p = p->next) { fprintf (fp, "%4d %4d %4d %s\n", f, p->fef, p->lef, mdef_ciphone_str (mdef, p->ci)); nnode++; } } E_INFO("%d phone lattice nodes written to %s\n", nnode, filename); /* Free phone lattice */ for (f = 0; f <= curfrm; f++) { for (p = phlatnode[f]; p; p = phlatnode[f]) { phlatnode[f] = p->next; listelem_free ((char *)p, sizeof(phlatnode_t)); } } ckd_free (phlatnode); fclose (fp);}phseg_t *allphone_end_utt (char *uttid){ history_t *h, *nexth, *besth = (history_t *)0; int32 f, best; phseg_t *s, *nexts; char *phlatdir; /* Free old phseg, if any */ for (s = phseg; s; s = nexts) { nexts = s->next; listelem_free ((char *)s, sizeof(phseg_t)); } phseg = NULL; /* Write phone lattice if specified */ if ((phlatdir = (char *) cmd_ln_access ("-phlatdir")) != NULL) allphone_latdump (uttid, phlatdir); /* Find most recent history nodes list */ for (f = curfrm-1; (f >= 0) && (frm_hist[f] == NULL); --f); if (f >= 0) { /* Find best of the most recent history nodes */ best = (int32) 0x80000000; for (h = frm_hist[f]; h; h = h->next) { if (h->score > best) { best = h->score; besth = h; } } /* Backtrace */ for (h = besth; h; h = h->hist) { s = (phseg_t *) listelem_alloc (sizeof(phseg_t)); s->ci = h->phmm->ci; s->sf = (h->hist) ? h->hist->ef + 1 : 0; s->ef = h->ef; s->score = h->score; if (h->hist) s->score -= h->hist->score; s->score += seg_score_scale (s->sf, s->ef); s->next = phseg; phseg = s; } } E_INFO("%10d history nodes created\n", n_histnode); /* Free history nodes */ for (f = 0; f < curfrm; f++) { for (h = frm_hist[f]; h; h = nexth) { nexth = h->next; listelem_free ((char *) h, sizeof(history_t)); } frm_hist[f] = NULL; } return phseg;}static void phone_tp_init (char *file, float64 floor, float64 wt, float64 ip){ int32 i, j, ct, tot, inspen; FILE *fp; char p1[128], p2[128]; s3cipid_t pid1, pid2; float64 p; tp = (int32 **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(int32)); inspen = logs3 (ip); if (! file) { for (i = 0; i < mdef->n_ciphone; i++) for (j = 0; j < mdef->n_ciphone; j++) tp[i][j] = inspen; return; } for (i = 0; i < mdef->n_ciphone; i++) for (j = 0; j < mdef->n_ciphone; j++) tp[i][j] = S3_LOGPROB_ZERO; if ((fp = fopen(file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); while (fscanf (fp, "%s %s %d %d", p1, p2, &ct, &tot) == 4) { pid1 = mdef_ciphone_id (mdef, p1); if (NOT_S3CIPID(pid1)) E_FATAL("Bad phone: %s\n", p1); pid2 = mdef_ciphone_id (mdef, p2); if (NOT_S3CIPID(pid2)) E_FATAL("Bad phone: %s\n", p2); if (tot > 0) p = ((float64)ct)/((float64)tot); else p = 0.0; if (p < floor) p = floor; tp[(unsigned)pid1][(unsigned)pid2] = (int32)(logs3(p) * wt) + inspen; } fclose (fp);}int32 allphone_init ( mdef_t *mdef, tmat_t *tmat ){ float64 *f64arg; char *file; float64 tpfloor, ip, wt; chk_tp_uppertri (); phmm_build (); file = (char *)cmd_ln_access("-phonetp"); if (! file) E_ERROR("-phonetpfn argument missing; assuming uniform transition probs\n"); tpfloor = *((float32 *) cmd_ln_access ("-phonetpfloor")); ip = *((float32 *) cmd_ln_access ("-inspen")); wt = *((float32 *) cmd_ln_access ("-phonetpwt")); phone_tp_init (file, tpfloor, wt, ip); f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); frm_hist = (history_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(history_t *)); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); phseg = NULL; return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?