flat_fwd.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页

C
2,212
字号
	h->score[to] = scr;	if (bestfrom >= 0) {	    h->history[to] = h->history[bestfrom];	    h->pid[to] = h->pid[bestfrom];	}		if (bestscr < scr)	    bestscr = scr;    }    h->bestscore = bestscr;}#endifstatic int32 whmm_eval (int32 *senscr){    int32 best, cf;    s3wid_t w;    whmm_t *h, *nexth, *prevh;    int32 n_mpx, n_nonmpx;        best = S3_LOGPROB_ZERO;    n_mpx = n_nonmpx = 0;    cf = n_frm;        for (w = 0; w < dict->n_word; w++) {	prevh = NULL;	for (h = whmm[w]; h; h = nexth) {	    nexth = h->next;	    if (h->active == cf) {		if (h->pos == 0) {		    eval_mpx_whmm (w, h, senscr);		    n_mpx++;		} else {		    eval_nonmpx_whmm (w, h, senscr);		    n_nonmpx++;		}				if (best < h->bestscore)		    best = h->bestscore;		prevh = h;	    } else {		if (prevh)		    prevh->next = nexth;		else		    whmm[w] = nexth;				whmm_free (h);	    }	}    }    pctr_increment (ctr_mpx_whmm, n_mpx);    pctr_increment (ctr_nonmpx_whmm, n_nonmpx);        return best;}/** * Record a word exit in word lattice. * NOTE: All exits from a single word in a given frame (for different right context * ciphones) must occur contiguously. */static void lattice_entry (s3wid_t w, int32 f, whmm_t *h){    s3cipid_t rc, npid;        if ((n_lat_entry <= 0) ||	(lattice[n_lat_entry-1].wid != w) || (lattice[n_lat_entry-1].frm != f)) {	/* New lattice entry */	if (n_lat_entry >= lat_alloc) {	    printf ("\n");	    E_INFO("Lattice size(%d) exceeded; increasing to %d\n",		   lat_alloc, lat_alloc+LAT_ALLOC_INCR);	    lat_alloc += LAT_ALLOC_INCR;	    lattice = ckd_realloc (lattice, lat_alloc * sizeof(lattice_t));	}		lattice[n_lat_entry].wid = w;	lattice[n_lat_entry].frm = (s3frmid_t) f;	lattice[n_lat_entry].score = h->score[final_state];	lattice[n_lat_entry].history = h->history[final_state];	/* Allocate space for different right context scores */	npid = get_rc_npid (w);	assert (npid > 0);	lattice[n_lat_entry].rcscore = (int32 *) ckd_calloc (npid, sizeof(int32));	for (rc = 0; rc < npid; rc++)	    lattice[n_lat_entry].rcscore[rc] = S3_LOGPROB_ZERO;	n_lat_entry++;    }    /* Slight BUG here: each rc can have its own history, but only the best retained!! */    if (lattice[n_lat_entry-1].score < h->score[final_state]) {	lattice[n_lat_entry-1].score = h->score[final_state];	lattice[n_lat_entry-1].history = h->history[final_state];    }    lattice[n_lat_entry-1].rcscore[h->rc] = h->score[final_state];}/** * Transition from hmm h into the next appropriate one for word w. * Threshold check for incoming score already completed. * The next HMM may be the last triphone for the word w, in which case, instantiate * multiple instances corresponding cross-word triphone modelling for all right context * ciphones. */static void whmm_transition (int32 w, whmm_t *h){    int32 lastpos, npid, nf;    whmm_t *nexth, *prevh;    s3cipid_t rc;    s3pid_t *pid;    lastpos = dict->word[w].pronlen - 1;    nf = n_frm+1;        if (h->pos < lastpos-1) {	/*	 * Transition to word HMM that's not the final one in word.  First, allocate	 * the HMM if not already present.	 */	if ((! h->next) || (h->next->pos != h->pos+1)) {	    nexth = whmm_alloc (h->pos+1);	    nexth->pid = &(wwpid[w][nexth->pos]);	    	    nexth->next = h->next;	    h->next = nexth;	}	/* Transition to next HMM */	nexth = h->next;	if (h->score[final_state] > nexth->score[0]) {	    nexth->score[0] = h->score[final_state];	    nexth->history[0] = h->history[final_state];	    nexth->active = nf;		/* Ensure it doesn't get pruned */	}    } else {	/*	 * Transition to final HMMs in word, with full cross-word rc modelling.  Allocate	 * all final triphone HMM instances first.	 */	prevh = h;	get_rcpid (w, &pid, &npid);		for (rc = 0; rc < npid; rc++) {	    if ((! prevh->next) || (prevh->next->rc != rc)) {		nexth = whmm_alloc (h->pos+1);		nexth->rc = rc;		nexth->pid = &(pid[rc]);				nexth->next = prevh->next;		prevh->next = nexth;	    }	    prevh = prevh->next;	}	/* Transition to next HMMs */	for (rc = 0, nexth = h->next; rc < npid; rc++, nexth = nexth->next) {	    if (h->score[final_state] > nexth->score[0]) {		nexth->score[0] = h->score[final_state];		nexth->history[0] = h->history[final_state];		nexth->active = nf;	/* Ensure it doesn't get pruned */	    }	}    }}static void whmm_exit (int32 thresh, int32 wordthresh){    s3wid_t w;    whmm_t *h;    int32 pronlen, nf;        nf = n_frm+1;    for (w = 0; w < dict->n_word; w++) {	pronlen = dict->word[w].pronlen;	for (h = whmm[w]; h; h = h->next) {	    if (h->bestscore >= thresh) {		if (h->pos == pronlen-1) {		    if (h->score[final_state] >= wordthresh)			lattice_entry (w, n_frm, h);		} else {		    if (h->score[final_state]+phone_penalty >= thresh)			whmm_transition (w, h);		}		h->active = nf;	    }	}    }}/** * Get the last two non-filler, non-silence lattice words w0 and w1 (base word-ids), * starting from l.  w1 is later than w0.  At least w1 must exist; w0 may not. */static void two_word_history (s3latid_t l, s3wid_t *w0, s3wid_t *w1){    s3latid_t l0, l1;    l0=0;        for (l1 = l; filler_word(lattice[l1].wid); l1 = lattice[l1].history);/* BHIKSHA HACK - PERMIT MULTIPLE PRONS FOR <s> */if (l1 != -1)     for (l0 = lattice[l1].history;          (IS_S3LATID(l0)) && (filler_word(lattice[l0].wid));	 l0 = lattice[l0].history);/* BHIKSHA HACK - PERMIT MULTIPLE PRONS FOR <s> */if (l1 == -1) *w1 = 0; else    *w1 = dict_basewid(dict, lattice[l1].wid);if (l1 == -1) *w0 = BAD_S3WID; else    *w0 = (NOT_S3LATID(l0)) ? BAD_S3WID : dict_basewid(dict,lattice[l0].wid);}/** * Transition into a word w.  Since we transition into the first phone position, the * triphone model must be derived from the incoming left context ciphone.  The first * state of the whmm instance inherits this triphone model and propagates it along with * the score. * If the first phone is also the last (single-phone word), we must also model all * possible right context ciphones, by instantiating separate whmm models for each rc. */static void word_enter (s3wid_t w, int32 score, s3latid_t l, s3cipid_t lc){    whmm_t *h, *prevh;    s3cipid_t b, rc;    s3pid_t pid, *rpid;    int32 s, npid, nf;        nf = n_frm+1;        b = dict->word[w].ciphone[0];    if (dict->word[w].pronlen > 1) {	/* Multi-phone word; no right context problem */	rc = dict->word[w].ciphone[1];	pid = lcpid[b][rc].pid[lcpid[b][rc].cimap[lc]];	if ((! whmm[w]) || (whmm[w]->pos != 0)) {	    h = whmm_alloc (0);	    for (s = 0; s < n_state; s++)		h->pid[s] = pid;	    	    h->next = whmm[w];	    whmm[w] = h;	}	h = whmm[w];	if (score > h->score[0]) {	    h->score[0] = score;	    h->history[0] = l;	    h->pid[0] = pid;	    h->active = nf;	}    } else {	/* Do all right contexts; first make sure all are allocated */	prevh = NULL;	h = whmm[w];	npid = get_rc_npid (w);	rpid = lrcpid[b][lc].pid;		for (rc = 0; rc < npid; rc++) {	    if ((! h) || (h->rc != rc)) {		h = whmm_alloc (0);		for (s = 0; s < n_state; s++)		    h->pid[s] = rpid[rc];		h->rc = rc;		if (prevh) {		    h->next = prevh->next;		    prevh->next = h;		} else {		    h->next = whmm[w];		    whmm[w] = h;		}	    }	    prevh = h;	    h = h->next;	}	assert (! h);		/* Transition to the allocated HMMs */	b = dict->word[w].ciphone[0];	for (rc = 0, h = whmm[w]; rc < npid; rc++, h = h->next) {	    if (score > h->score[0]) {		h->score[0] = score;		h->history[0] = l;		h->pid[0] = rpid[rc];		h->active = nf;	    }	}    }}/** * Backoff node when backing off all the way to unigrams.  Since each word exits with * #ciphones different scores (for so many different right contexts), a separate node * exists for each context. */typedef struct {    s3latid_t latid;	/** History entry */    int32 score;	/** Acoustic + backed off LM score */    s3cipid_t lc;	/** Last ciphone of history entry, to be used as left context upon			   entering a new word. */} backoff_t;static backoff_t *ug_backoff, *filler_backoff;static uint8 *tg_trans_done;	/** If tg_trans_done[w] TRUE, trigram transition to w				   occurred for a given history, and backoff bigram				   transition from same history should be avoided */static int32 *rcscore = NULL;	/** rc scores uncompacted; one entry/rc-ciphone */static s3wid_t *word_cand_cf;	/** BAD_S3WID terminated array of candidate words for word				   transition in current frame (if using input word				   lattices to restrict search). *//** * Unigrams re-organized for faster unigram word transitions.  Words partitioned by * their first CI phone and ordered in descending unigram probability within each * partition. */typedef struct word_ugprob_s {    s3wid_t wid;    int32 ugprob;    struct word_ugprob_s *next;} word_ugprob_t;static word_ugprob_t **word_ugprob;/** * Build array of candidate words that start around the current frame (cf). * Note: filler words are not in this list since they are always searched (see * word_trans). */static void build_word_cand_cf (int32 cf){    int32 f, sf, ef;    s3wid_t w, n;    word_cand_t *candp;        for (w = 0; w < dict->n_word; w++)	word_cand_cf[w] = 0;        if ((sf = cf - word_cand_win) < 0)	sf = 0;    if ((ef = cf + word_cand_win) >= S3_MAX_FRAMES)	ef = S3_MAX_FRAMES-1;        for (f = sf; f <= ef; f++) {	for (candp = word_cand[f]; candp; candp = candp->next)	    word_cand_cf[candp->wid] = 1;    }    word_cand_cf[startwid] = 0;	/* Never hypothesized (except at beginning) */    for (w = dict->filler_start; w <= dict->filler_end; w++)	word_cand_cf[w] = 0;	/* Handled separately */    word_cand_cf[finishwid] = 1;	/* Always a candidate */    n = 0;    for (w = 0; w < dict->n_word; w++)	if (word_cand_cf[w])	    word_cand_cf[n++] = w;    word_cand_cf[n] = BAD_S3WID;}/** Transition for one word.  */static void word_trans (int32 thresh){    s3latid_t l;	/* lattice entry index */    s3cipid_t *rcmap, rc, lc;    s3wid_t w, bw0, bw1, nextwid;    tg_t *tgptr;    bg_t *bgptr;    int32 bowt, acc_bowt, newscore;    int32 n_tg, n_bg;    int32 cand, lscr;    int32 lat_start;        /*    int32 tempi, temp_j;*/    lat_start = frm_latstart[n_frm];        for (rc = 0; rc < mdef->n_ciphone; rc++) {	ug_backoff[rc].score = S3_LOGPROB_ZERO;	filler_backoff[rc].score = S3_LOGPROB_ZERO;    }        if (n_word_cand > 0)	build_word_cand_cf (n_frm);        /* Trigram/Bigram word transitions from words just exited */    for (l = lat_start; l < n_lat_entry; l++) {	w = lattice[l].wid;		if (w == finishwid)	    continue;		/* Cross-word left context for new words to which we may transition */	lc = dict->word[w].ciphone[dict->word[w].pronlen-1];		/* Uncompact path scores for all right context ciphones for word just finished */	rcmap = get_rc_cimap (w);	for (rc = 0; rc < mdef->n_ciphone; rc++)	  rcscore[rc] = lattice[l].rcscore[rcmap[rc]];	/* Find the last real (non-filler, non-silence) two-word history */	two_word_history (l, &bw0, &bw1);	if (n_word_cand <= 0) {	    /* Transition to all words in vocab */	    /* Clear trigram transition flag for each word for this history */	    memset (tg_trans_done, 0, dict->n_word*sizeof(uint8));	    /* First, transition to trigram followers of bw0, bw1 */	    acc_bowt = 0;	    if ((IS_S3WID(bw0)) && ((n_tg = lm_tglist (lm,						       dict2lmwid[dict_basewid(dict,bw0)], 						       dict2lmwid[dict_basewid(dict,bw1)], &tgptr, &bowt)) > 0)) {		/* Transition to trigram followers of bw0, bw1, if any */		for (; n_tg > 0; --n_tg, tgptr++) {		    /* Transition to all alternative pronunciations for trigram follower */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?