flat_fwd.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页

C
2,212
字号
		    nextwid = LM_DICTWID(lm, tgptr->wid);		    		    if (IS_S3WID(nextwid) && (nextwid != startwid)) {			for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) {			    newscore = rcscore[dict->word[w].ciphone[0]] +				LM_TGPROB (lm, tgptr) + phone_penalty;			    			    if (newscore >= thresh) {				word_enter (w, newscore, l, lc);				tg_trans_done[w] = 1;			    }			}		    }		}				acc_bowt = bowt;	    }	    	    /* Transition to bigram followers of bw1 */	    if ((n_bg = lm_bglist (lm,				   dict2lmwid[dict_basewid(dict,bw1)], 				   &bgptr, 				   &bowt)) > 0) {		/* Transition to bigram followers of bw1, if any */		for (; n_bg > 0; --n_bg, bgptr++) {		    /* Transition to all alternative pronunciations for bigram follower */		    nextwid = LM_DICTWID (lm, bgptr->wid);		    		    if (IS_S3WID(nextwid) &&			(! tg_trans_done[nextwid]) &&	/* TG transition already done */			(nextwid != startwid)) {	/* No transition to <s> */			for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) {			    newscore = rcscore[dict->word[w].ciphone[0]] +				LM_BGPROB (lm, bgptr) + acc_bowt + phone_penalty;			    			    if (newscore >= thresh)				word_enter (w, newscore, l, lc);			}		    }		}				acc_bowt += bowt;	    }	    	    /* Update unigram backoff node */	    for (rc = 0; rc < mdef->n_ciphone; rc++) {		if (rcscore[rc] <= S3_LOGPROB_ZERO)		    continue;		if (rcscore[rc]+acc_bowt+phone_penalty > ug_backoff[rc].score) {		    ug_backoff[rc].score = rcscore[rc]+acc_bowt+phone_penalty;		    ug_backoff[rc].latid = l;		    ug_backoff[rc].lc = lc;		}	    }	} else {	    /* Transition to words in word_cand_cf */	    for (cand = 0; IS_S3WID(word_cand_cf[cand]); cand++) {		nextwid = word_cand_cf[cand];				lscr = lm_tg_score (lm,				    dict2lmwid[dict_basewid(dict,bw0)],				    dict2lmwid[dict_basewid(dict,bw1)],				    dict2lmwid[nextwid],				    nextwid);		for (w = nextwid; IS_S3WID(w); w = dict->word[w].alt) {		    newscore = rcscore[dict->word[w].ciphone[0]] + lscr + phone_penalty;		    		    if (newscore >= thresh)			word_enter (w, newscore, l, lc);		}	    }	}		/* Update filler backoff node */	for (rc = 0; rc < mdef->n_ciphone; rc++) {	    if (rcscore[rc] <= S3_LOGPROB_ZERO)		continue;	    if (rcscore[rc]+phone_penalty > filler_backoff[rc].score) {		filler_backoff[rc].score = rcscore[rc]+phone_penalty;		filler_backoff[rc].latid = l;		filler_backoff[rc].lc = lc;	    }	}    }        /*     * We have finished transitions to all tg/bg followers of all words just ended.     * Or, if working from a lattice, transitioned to all words that may start at this     * point as indicated by the lattice.     */    /* Transition to unigrams from backoff nodes (if not working from a lattice) */    if (n_word_cand <= 0) {#if 0	n_ug = lm_uglist (&ugptr);	for (; n_ug > 0; --n_ug, ugptr++) {	    for (w = ugptr->dictwid; IS_S3WID(w); w = dict->word[w].alt) {		if (w == startwid)		    continue;				rc = dict->word[w].ciphone[0];		if (ug_backoff[rc].score >= thresh) {		    newscore = ug_backoff[rc].score + LM_UGPROB (lm, ugptr);		    if (newscore >= thresh)			word_enter (w, newscore, ug_backoff[rc].latid, ug_backoff[rc].lc);		}	    }	}#else	word_ugprob_t *wp;	int32 rcscr;	for (rc = 0; rc < mdef->n_ciphone; rc++) {	    rcscr = ug_backoff[rc].score;	    l = ug_backoff[rc].latid;	    lc = ug_backoff[rc].lc;	    for (wp = word_ugprob[rc]; wp; wp = wp->next) {		newscore = rcscr + wp->ugprob;		if (newscore < thresh)		    break;		word_enter (wp->wid, newscore, l, lc);	    }	}#endif    }    /*     * Transition to silence and filler words.  Assume alternative pronunciations     * are all within filler_start..filler_end     */    for (w = dict->filler_start; w <= dict->filler_end; w++) {	if ((w == startwid) || (w == finishwid))	    continue;		rc = dict->word[w].ciphone[0];	if (filler_backoff[rc].score > S3_LOGPROB_ZERO) {	    newscore = filler_backoff[rc].score + fillpen(fpen,dict_basewid(dict,w));	    if (newscore >= thresh)		word_enter (w, newscore, filler_backoff[rc].latid, filler_backoff[rc].lc);	}    }    /* Free rcscore here, if necessary to conserve memory space */}/** Initialize the forward search. */void fwd_init (mdef_t* _mdef, tmat_t* _tmat, dict_t* _dict,lm_t *_lm){    float64 *f64arg;    float32 *f32arg;    char *tmpstr;    E_INFO ("Forward Viterbi Initialization\n");        mdef = _mdef;    tmat = _tmat;    dict = _dict;    lm   = _lm;        assert (mdef && tmat && dict && lm);    /* HMM states information */    n_state = mdef->n_emit_state + 1;    final_state = n_state - 1;    /* Variables for speeding up whmm evaluation */    st_sen_scr = (int32 *) ckd_calloc (n_state-1, sizeof(int32));    /* Some key word ids */    silwid = dict_wordid (dict,S3_SILENCE_WORD);    startwid = dict_wordid (dict,S3_START_WORD);    finishwid = dict_wordid (dict,S3_FINISH_WORD);    if ((NOT_S3WID(silwid)) || (NOT_S3WID(startwid)) || (NOT_S3WID(finishwid)))	E_FATAL("%s, %s, or %s missing from dictionary\n",		S3_SILENCE_WORD, S3_START_WORD, S3_FINISH_WORD);    /* Beam widths and penalties */    f64arg = (float64 *) cmd_ln_access ("-beam");    beam = logs3 (*f64arg);    f64arg = (float64 *) cmd_ln_access ("-nwbeam");    wordbeam = logs3 (*f64arg);    f32arg = (float32 *) cmd_ln_access ("-phonepen");    phone_penalty = logs3 (*f32arg);    E_INFO ("logs3(beam)= %d, logs3(nwbeam)= %d\n", beam, wordbeam);        /* Allocate whmm structure */    whmm = (whmm_t **) ckd_calloc (dict->n_word, sizeof(whmm_t *));    /* Allocate output word lattice structure */    lat_alloc = *((int32 *) cmd_ln_access ("-bptblsize"));    lattice = (lattice_t *) ckd_calloc (lat_alloc, sizeof(lattice_t));    n_lat_entry = 0;        /* Build cross-word triphone models */    tmp_xwdpid = (s3pid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3pid_t));    n_backoff_ci = 0;    build_wwpid ();    E_INFO("%d within-word triphone instances mapped to CI-phones\n", n_backoff_ci);    n_backoff_ci = 0;    build_xwdpid_map ();    E_INFO("%d cross-word triphones mapped to CI-phones\n", n_backoff_ci);    ckd_free (tmp_xwdpid);    /* Data structures needed during word transition */    rcscore = (int32 *) ckd_calloc (mdef->n_ciphone, sizeof(int32));    ug_backoff = (backoff_t *) ckd_calloc (mdef->n_ciphone, sizeof(backoff_t));    filler_backoff = (backoff_t *) ckd_calloc (mdef->n_ciphone, sizeof(backoff_t));    tg_trans_done = (uint8 *) ckd_calloc (dict->n_word, sizeof(uint8));        /* Check transition matrices for upper-triangularity */    chk_tp_uppertri ();    /* Input candidate-word lattices information to restrict search; if any */    word_cand_dir = (char *) cmd_ln_access ("-inlatdir");    latfile_ext = (char *) cmd_ln_access ("-latext");    word_cand_win = *((int32 *) cmd_ln_access ("-inlatwin"));    if (word_cand_win < 0) {	E_ERROR("Invalid -inlatwin argument: %d; set to 50\n", word_cand_win);	word_cand_win = 50;    }    /* Allocate pointers to lists of word candidates in each frame */    if (word_cand_dir) {	word_cand = (word_cand_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(word_cand_t *));	word_cand_cf = (s3wid_t *) ckd_calloc (dict->n_word+1, sizeof(s3wid_t));    }    /* Space for first lattice entry in each frame (+ terminating sentinel) */    frm_latstart = (s3latid_t *) ckd_calloc (S3_MAX_FRAMES+1, sizeof(s3latid_t));    /* Allocate timers and counters for statistics gathering */        pctr_new(ctr_mpx_whmm,"mpx");    pctr_new(ctr_nonmpx_whmm,"~mpx");    pctr_new(ctr_latentry,"lat");    /* Word to be traced in detail */    if ((tmpstr = (char *) cmd_ln_access ("-tracewhmm")) != NULL) {	trace_wid = dict_wordid (dict,tmpstr);	if (NOT_S3WID(trace_wid))	    E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr);    } else	trace_wid = BAD_S3WID;    /* Active words to be dumped for debugging after the given frame no, if any */    tmpstr = (char *) cmd_ln_access ("-worddumpsf");    word_dump_sf = tmpstr ? *((int32 *) tmpstr) : (int32) 0x7ffffff0;    /* Active HMMs to be dumped for debugging after the given frame no, if any */    tmpstr = (char *) cmd_ln_access ("-hmmdumpsf");    hmm_dump_sf = tmpstr ? *((int32 *) tmpstr) : (int32) 0x7ffffff0;    /* Initialize word_ugprob (assuming there is only one LM) */    {	s3wid_t w;	s3cipid_t ci;	int32 n_ug, ugprob;	ug_t *ugptr;	word_ugprob_t *wp, *prevwp;	word_ugprob = (word_ugprob_t **) ckd_calloc (mdef->n_ciphone,						     sizeof(word_ugprob_t *));	n_ug = lm_uglist (lm,&ugptr);	for (; n_ug > 0; --n_ug, ugptr++) {	    if ((w = ugptr->dictwid) == startwid)		continue;	    ugprob = LM_UGPROB(lm, ugptr);	    for (; IS_S3WID(w); w = dict->word[w].alt) {		ci = dict->word[w].ciphone[0];		prevwp = NULL;		for (wp = word_ugprob[ci]; wp && (wp->ugprob >= ugprob); wp = wp->next)		    prevwp = wp;		wp = (word_ugprob_t *) listelem_alloc (sizeof(word_ugprob_t));		wp->wid = w;		wp->ugprob = ugprob;		if (prevwp) {		    wp->next = prevwp->next;		    prevwp->next = wp;		} else {		    wp->next = word_ugprob[ci];		    word_ugprob[ci] = wp;		}	    }	}    }    /* Initialize bestpath search related */    dag.list = NULL;}static int32 word_cand_load (FILE *fp){    char line[1024], word[1024];    int32 i, k, n, nn, sf, seqno, lineno;    s3wid_t w;    word_cand_t *candp;        /* Skip past Nodes parameter */    lineno = 0;    nn = 0;    word[0] = '\0';    while (fgets (line, sizeof(line), fp) != NULL) {	lineno++;	if ((sscanf (line, "%s %d", word, &nn) == 2) && (strcmp (word, "Nodes") == 0))	    break;    }    if ((strcmp (word, "Nodes") != 0) || (nn <= 0)) {	E_ERROR("%s: Nodes parameter missing from input lattice\n", uttid);	return -1;    }    n = 0;    for (i = 0; i < nn; i++) {	if (fgets (line, 1024, fp) == NULL) {	    E_ERROR("%s: Incomplete input lattice\n", uttid);	    return -1;	}	lineno++;	if ((k = sscanf (line, "%d %s %d", &seqno, word, &sf)) != 3) {	    E_ERROR("%s: Error in lattice, line %d: %s\n", uttid, lineno, line);	    return -1;	}	if (seqno != i) {	    E_ERROR("%s: Seq# error in lattice, line %d: %s\n", uttid, lineno, line);	    return -1;	}	if ((sf < 0) || (sf >= S3_MAX_FRAMES)) {	    E_ERROR("%s: Startframe error in lattice, line %d: %s\n", uttid, lineno, line);	    return -1;	}		w = dict_wordid (dict,word);	if (NOT_S3WID(w)) {	    E_ERROR("%s: Unknown word in lattice: %s; ignored\n", uttid, word);	    continue;	}	w = dict_basewid(dict,w);		/* Check node not already present; avoid duplicates */	for (candp = word_cand[sf]; candp && (candp->wid != w); candp = candp->next);	if (candp)	    continue;		candp = (word_cand_t *) listelem_alloc (sizeof(word_cand_t));	candp->wid = w;	candp->next = word_cand[sf];	word_cand[sf] = candp;	n++;    }        return n;}static void word_cand_free ( void ){    word_cand_t *candp, *next;    int32 f;        for (f = 0; f < S3_MAX_FRAMES; f++) {	for (candp = word_cand[f]; candp; candp = next) {	    next = candp->next;	    listelem_free ((char *)candp, sizeof(word_cand_t));	}	word_cand[f] = NULL;    }    n_word_cand = 0;}/* * Begin forward Viterbi search of one utterance */void fwd_start_utt (char *id){    int32 w, l, ispipe;    char str[1024];    FILE *fp;    uttid = ckd_salloc (id);        ptmr_reset (&tm_hmmeval);    ptmr_reset (&tm_hmmtrans);    ptmr_reset (&tm_wdtrans);           if (uttid)	ckd_free (uttid);    uttid = ckd_salloc (id);        /* Free rcscores for each lattice entry */    for (l = 0; l < n_lat_entry; l++) {	if (lattice[l].rcscore) {	    ckd_free (lattice[l].rcscore);	    lattice[l].rcscore = NULL;	}    }    n_lat_entry = 0;    /* If input lattice file containing word candidates to be searched specified; use it */    if (word_cand_dir) {	sprintf (str, "%s/%s.%s", word_cand_dir, id, latfile_ext);	E_INFO("Reading input lattice: %s\n", str);		if ((fp = fopen_compchk (str, &ispipe)) == NULL)	    E_ERROR("fopen_compchk(%s) failed; running full search\n", str);	else {	    if ((n_word_cand = word_cand_load (fp)) <= 0) {		E_ERROR("Bad or empty lattice file: %s; ignored\n", str);		word_cand_free ();	    } else		E_INFO("%d lattice entries read\n", n_word_cand);	    fclose_comp (fp, ispipe);	}    }        /* Enter all pronunciations of startwid (begin silence) */    n_frm = -1;	/* Since word_enter transitions to "NEXT" frame */    for (w = startwid; IS_S3WID(w); w = dict->word[w].alt)	word_enter (w, 0, BAD_S3LATID,		    dict->word[silwid].c

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?