📄 s3_align.c
字号:
#if _DEBUG_ALIGN_ dump_sent_hmm (); /* For debugging */#endif k = n_pnode * mdef->n_emit_state; if (k > active_list_size) { /* Need to grow active list arrays */ if (active_list_size > 0) { ckd_free (cur_active); ckd_free (next_active); } for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR); cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); } return 0;}int32 align_destroy_sent_hmm ( void ){ pnode_t *p; destroy_state_dag (); for (p = pnode_list; p; p = p->alloc_next) { plinks_free (p->succlist); plinks_free (p->predlist); } pnodes_free (); plinks_free (phead.succlist); plinks_free (ptail.predlist); return 0;}static history_t *lat_entry (snode_t *s){ history_t *h; h = (history_t *) listelem_alloc (sizeof(history_t)); h->snode = s; h->score = s->newscore; h->pred = s->newhist; h->alloc_next = hist_head; hist_head = h; return h;}static void activate (snode_t *s, int32 frm){ if (s->active_frm != frm) { assert (s->active_frm < frm); s->active_frm = frm; next_active[n_active++] = s; }}/** * Flag the active senones. */void align_sen_active (s3senid_t *senlist, int32 n_sen){ int32 i, sen; for (sen = 0; sen < n_sen; sen++) senlist[sen] = 0; for (i = 0; cur_active[i]; i++) { assert (IS_S3SENID(cur_active[i]->sen)); senlist[cur_active[i]->sen] = 1; }}/** * Start Viterbi alignment using the sentence HMM previously built. * Assumes that each utterance will only be aligned once; state member variables * initialized during sentence HMM building. */int32 align_start_utt (char *uttid){ slink_t *l; curfrm = 0; shead.score = 0; shead.hist = NULL; hist_head = NULL; n_active = 0; for (l = shead.succlist; l; l = l->next) { assert (l->node->active_frm < 0); l->node->active_frm = 0; cur_active[n_active++] = l->node; } cur_active[n_active++] = NULL; return 0;}/** * One frame of Viterbi time alignment. */int32 align_frame (int32 *senscr){ int32 i, scr, tmpbest, bestscore, nf, thresh; snode_t *s, *ps; slink_t *l; history_t *tmphist = NULL; snode_t **tmpswap; nf = curfrm + 1; n_active = 0; /* For each active state update state score and history */ bestscore = (int32) 0x80000000; for (i = 0; cur_active[i]; i++) { s = cur_active[i]; assert (IS_S3SENID(s->sen)); tmpbest = (int32) 0x80000000; for (l = s->predlist; l; l = l->next) { ps = l->node; if (ps->active_frm == curfrm) { scr = ps->score + l->prob; if (scr > tmpbest) { tmpbest = scr; tmphist = ps->hist; } } } assert (tmpbest > (int32) 0x80000000); s->newscore = tmpbest + senscr[s->sen]; s->newhist = tmphist; if (s->newscore > bestscore) bestscore = s->newscore; } if (bestscore <= S3_LOGPROB_ZERO) E_ERROR("Bestscore= %d in frame %d\n", bestscore, curfrm); score_scale[curfrm] = bestscore; thresh = bestscore + beam; /* Update history lattice for each active state */ for (i = 0; cur_active[i]; i++) { s = cur_active[i]; if (s->newscore >= thresh) { s->newscore -= bestscore; /* Scale, to avoid underflow */ s->score = s->newscore; s->hist = lat_entry (s); activate (s, nf); /* Also activate successor nodes of s as they are reachable next frame */ for (l = s->succlist; l; l = l->next) { if (IS_S3SENID(l->node->sen)) activate (l->node, nf); } } else { s->score = S3_LOGPROB_ZERO; s->hist = NULL; } } /* Update active state list */ next_active[n_active] = NULL; tmpswap = cur_active; cur_active = next_active; next_active = tmpswap; curfrm = nf; return 0;}static void build_stseg (history_t *rooth){ history_t *h, *prevh; align_stseg_t *stseg, *tail = NULL; int32 f, prevscr; assert (align_stseg == NULL); prevscr = 0; prevh = NULL; for (f = 0, h = rooth; h; h = h->pred, f++) { stseg = (align_stseg_t *) listelem_alloc (sizeof(align_stseg_t)); if (! align_stseg) align_stseg = stseg; else tail->next = stseg; tail = stseg; stseg->next = NULL; stseg->pid = h->snode->pnode->pid; stseg->state = h->snode->state; stseg->start = ((! prevh) || (prevh->snode->pnode->id != h->snode->pnode->id)); stseg->score = h->score - prevscr + score_scale[f]; stseg->bsdiff = h->score; prevscr = h->score; prevh = h; }}static void build_phseg (history_t *rooth){ history_t *h, *nh; align_phseg_t *phseg, *tail = NULL; int32 f, prevf, prevscr, scale, bsdiff; assert (align_phseg == NULL); prevscr = 0; bsdiff = 0; scale = 0; prevf = -1; for (f = 0, h = rooth; h; h = h->pred, f++) { bsdiff += h->score; scale += score_scale[f]; nh = h->pred; if ((! nh) || (nh->snode->pnode->id != h->snode->pnode->id)) { phseg = (align_phseg_t *) listelem_alloc (sizeof(align_phseg_t)); if (! align_phseg) align_phseg = phseg; else tail->next = phseg; tail = phseg; phseg->next = NULL; phseg->pid = h->snode->pnode->pid; phseg->sf = prevf+1; phseg->ef = f; phseg->score = h->score - prevscr + scale, phseg->bsdiff = bsdiff; bsdiff = 0; scale = 0; prevscr = h->score; prevf = f; } }}static void build_wdseg (history_t *rooth){ history_t *h, *nh; align_wdseg_t *wdseg, *tail = NULL; int32 f, prevf, prevscr, scale, bsdiff; assert (align_wdseg == NULL); prevscr = 0; bsdiff = 0; scale = 0; prevf = -1; for (f = 0, h = rooth; h; h = h->pred, f++) { bsdiff += h->score; scale += score_scale[f]; nh = h->pred; if ((! nh) || ((nh->snode->pnode->id != h->snode->pnode->id) && (nh->snode->pnode->pos == 0))) { /* End of current word */ wdseg = (align_wdseg_t *) listelem_alloc (sizeof(align_wdseg_t)); if (! align_wdseg) align_wdseg = wdseg; else tail->next = wdseg; tail = wdseg; wdseg->next = NULL; wdseg->wid = h->snode->pnode->wid; wdseg->sf = prevf+1; wdseg->ef = f; wdseg->score = h->score - prevscr + scale, wdseg->bsdiff = bsdiff; bsdiff = 0; scale = 0; prevscr = h->score; prevf = f; } }}/** * All frames consumed. Trace back best Viterbi state sequence and dump it out. */int32 align_end_utt (align_stseg_t **stseg_out, align_phseg_t **phseg_out, align_wdseg_t **wdseg_out){ slink_t *l; snode_t *s; history_t *h, *ph, *nh; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; /* Free up previous result, if any */ while (align_stseg) { stseg = align_stseg->next; listelem_free ((char *) align_stseg, sizeof(align_stseg_t)); align_stseg = stseg; } while (align_phseg) { phseg = align_phseg->next; listelem_free ((char *) align_phseg, sizeof(align_phseg_t)); align_phseg = phseg; } while (align_wdseg) { wdseg = align_wdseg->next; listelem_free ((char *) align_wdseg, sizeof(align_wdseg_t)); align_wdseg = wdseg; } /* First find best ending history and link to stail */ stail.score = (int32)0x80000000; stail.hist = NULL; for (l = stail.predlist; l; l = l->next) { s = l->node; if ((s->active_frm == curfrm) && (s->score + l->prob > stail.score)) { stail.score = s->score + l->prob; stail.hist = s->hist; } } if (stail.hist) { /* Reverse the best Viterbi path (back trace) so it is forward in time */ nh = NULL; for (h = stail.hist; h; h = ph) { ph = h->pred; h->pred = nh; nh = h; } /* Trace state, phone, and word segmentations */ build_stseg (nh); build_phseg (nh); build_wdseg (nh); } *stseg_out = align_stseg; *phseg_out = align_phseg; *wdseg_out = align_wdseg; /* delete history list */ while (hist_head) { h = hist_head->alloc_next; listelem_free ((char *) hist_head, sizeof(history_t)); hist_head = h; } return (stail.hist ? 0 : -1);}int32 align_init ( mdef_t *_mdef, tmat_t *_tmat, dict_t *_dict){ int32 k; s3wid_t w; float64 *f64arg; mdef = _mdef; tmat = _tmat; dict = _dict; assert (mdef && tmat && dict); startwid = dict_wordid (dict, S3_START_WORD); finishwid = dict_wordid (dict, S3_FINISH_WORD); silwid = dict_wordid (dict, S3_SILENCE_WORD); if ((NOT_S3WID(startwid)) || (NOT_S3WID(finishwid))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3WID(silwid)) E_ERROR("%s not in dictionary; no optional silence inserted between words\n", S3_SILENCE_WORD); /* Create list of optional filler words to be inserted between transcript words */ fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3), sizeof(s3wid_t)); k = 0; if (IS_S3WID(silwid)) fillwid[k++] = silwid; for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((dict_basewid (dict, w) == w) && (w != silwid) && (w != startwid) && (w != finishwid)) fillwid[k++] = w; } fillwid[k] = BAD_S3WID; f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); hist_head = NULL; align_stseg = NULL; align_phseg = NULL; align_wdseg = NULL; return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -