s3_allphone.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 844 行 · 第 1/2 页

C
844
字号
}static void phmm_eval (phmm_t *p, int32 *senscr){    int32 **tp;    int32 nst, from, to, bestfrom, newscr, bestscr;    history_t *besthist = (history_t *)0;        nst = mdef->n_emit_state;    tp = tmat->tp[p->tmat];        bestscr = S3_LOGPROB_ZERO;        /* Update state scores from last to first (assuming no backward transitions) */    for (to = nst-1; to >= 0; --to) {	/* Find best incoming score to the "to" state from predecessor states */	bestfrom = S3_LOGPROB_ZERO;	for (from = to; from >= 0; from--) {	    if ((tp[from][to] > S3_LOGPROB_ZERO) && (p->score[from] > S3_LOGPROB_ZERO)) {		newscr = p->score[from] + tp[from][to];		if (newscr > bestfrom) {		    bestfrom = newscr;		    besthist = p->hist[from];		}	    }	}	/* If looking at initial state, also consider incoming score */	if ((to == 0) && (p->inscore > bestfrom)) {	    bestfrom = p->inscore;	    besthist = p->inhist;	}		/* Update state score */	if (bestfrom > S3_LOGPROB_ZERO) {	    p->score[to] = bestfrom + senscr[p->sen[to]];	    p->hist[to] = besthist;	    if (p->score[to] > bestscr)		bestscr = p->score[to];	}    }    /* Update non-emitting exit state score */    bestfrom = S3_LOGPROB_ZERO;    to = nst;    for (from = nst-1; from >= 0; from--) {	if ((tp[from][to] > S3_LOGPROB_ZERO) && (p->score[from] > S3_LOGPROB_ZERO)) {	    newscr = p->score[from] + tp[from][to];	    if (newscr > bestfrom) {		bestfrom = newscr;		besthist = p->hist[from];	    }	}    }    p->score[to] = bestfrom;    p->hist[to] = besthist;    if (p->score[to] > bestscr)	bestscr = p->score[to];        p->bestscore = bestscr;}/** Evaluate active PHMMs */static int32 phmm_eval_all (int32 *senscr){    s3cipid_t ci;    phmm_t *p;    int32 best;        best = S3_LOGPROB_ZERO;    for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    if (p->active == curfrm) {		phmm_eval (p, senscr);		if (p->bestscore > best)		    best = p->bestscore;	    }	}    }    return best;}static void phmm_exit (int32 best){    s3cipid_t ci;    phmm_t *p;    int32 th, nf, nst, s;    history_t *h;        th = best + beam;        frm_hist[curfrm] = NULL;    nf = curfrm+1;    nst = mdef->n_emit_state;        for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    if (p->active == curfrm) {		if (p->bestscore >= th) {		    /* Scale state scores to prevent underflow */		    for (s = 0; s <= nst; s++)			if (p->score[s] > S3_LOGPROB_ZERO)			    p->score[s] -= best;		    		    /* Create lattice entry if exiting */		    if (p->score[nst] >= beam) { /* beam, not th because scores scaled */			h = (history_t *) listelem_alloc (sizeof(history_t));			h->score = p->score[nst];			h->ef = curfrm;			h->phmm = p;			h->hist = p->hist[nst];						h->next = frm_hist[curfrm];			frm_hist[curfrm] = h;						n_histnode++;		    }		    		    /* Mark PHMM active in next frame */		    p->active = nf;		} else {		    /* Reset state scores */		    for (s = 0; s <= nst; s++) {			p->score[s] = S3_LOGPROB_ZERO;			p->hist[s] = NULL;		    }		}	    }	    	    /* Reset incoming score in preparation for cross-PHMM transition */	    p->inscore = S3_LOGPROB_ZERO;	}    }}static void phmm_trans ( void ){    history_t *h;    phmm_t *from, *to;    plink_t *l;    int32 newscore, nf;        nf = curfrm+1;        /* Transition from exited nodes to initial states of HMMs */    for (h = frm_hist[curfrm]; h; h = h->next) {	from = h->phmm;	for (l = from->succlist; l; l = l->next) {	    to = l->phmm;	    newscore = h->score + tp[(unsigned)from->ci][(unsigned)to->ci];	    if ((newscore > beam) && (newscore > to->inscore)) {		to->inscore = newscore;		to->inhist = h;		to->active = nf;	    }	}    }}int32 allphone_frame (int32 *senscr){    int32 bestscr;        bestscr = phmm_eval_all (senscr);    score_scale[curfrm] = bestscr;    phmm_exit (bestscr);    phmm_trans ();        curfrm++;        return 0;}/** Return accumulated score scale in frame range [sf..ef] */static int32 seg_score_scale (int32 sf, int32 ef){    int32 scale, s;        for (s = sf, scale = 0; s <= ef; s++, scale += score_scale[s]);    return scale;}/* Phone lattice node */typedef struct phlatnode_s {    s3cipid_t ci;    uint16 fef, lef;	/* First and last end frame for this node */    struct phlatnode_s *next;} phlatnode_t;static void allphone_latdump (char *uttid, char *latdir){    int32 f, sf, latbeam, best, thresh, nnode;    history_t *h;    char filename[4096];    FILE *fp;    float64 *f64arg;    phlatnode_t **phlatnode, *p;        f64arg = (float64 *) cmd_ln_access ("-phlatbeam");    latbeam = logs3 (*f64arg);    sprintf (filename, "%s/%s.phlat", latdir, uttid);    if ((fp = fopen(filename, "w")) == NULL) {	E_ERROR("fopen(%s,w) failed\n", filename);	return;    }    phlatnode = (phlatnode_t **) ckd_calloc (curfrm+1, sizeof(phlatnode_t));        for (f = 0; f < curfrm; f++) {	/* Find best score for this frame and set pruning threshold */	best = (int32)0x80000000;	for (h = frm_hist[f]; h; h = h->next)	    if (h->score > best)		best = h->score;	thresh = best + latbeam;		for (h = frm_hist[f]; h; h = h->next) {	    /* Skip this node if below threshold */	    if (h->score < thresh)		continue;	    	    sf = h->hist ? h->hist->ef + 1 : 0;	    assert (h->ef == f);	    	    /* Find phlatnode for this <ci,sf> pair */	    for (p = phlatnode[sf]; p && (p->ci != h->phmm->ci); p = p->next);	    if (! p) {		p = (phlatnode_t *) listelem_alloc (sizeof(phlatnode_t));		p->next = phlatnode[sf];		phlatnode[sf] = p;		p->ci = h->phmm->ci;		p->fef = p->lef = h->ef;	    }	    assert (p->lef <= h->ef);	    p->lef = h->ef;#if 0	    score = h->score;	    if (h->hist)		score -= h->hist->score;	    score += seg_score_scale (sf, h->ef);	    fprintf (fp, "%4d %3d %12d %s\n",	/* startfrm endfrm ciphone */		     sf, h->ef - sf + 1, score, mdef_ciphone_str (mdef, h->phmm->ci));#endif	}    }    /* Write phone lattice; startframe, first end frame, last end frame, ciphone */    nnode = 0;    for (f = 0; f <= curfrm; f++) {	for (p = phlatnode[f]; p; p = p->next) {	    fprintf (fp, "%4d %4d %4d %s\n", f, p->fef, p->lef,		     mdef_ciphone_str (mdef, p->ci));	    nnode++;	}    }    E_INFO("%d phone lattice nodes written to %s\n", nnode, filename);    /* Free phone lattice */    for (f = 0; f <= curfrm; f++) {	for (p = phlatnode[f]; p; p = phlatnode[f]) {	    phlatnode[f] = p->next;	    listelem_free ((char *)p, sizeof(phlatnode_t));	}    }    ckd_free (phlatnode);        fclose (fp);}phseg_t *allphone_end_utt (char *uttid){    history_t *h, *nexth, *besth = (history_t *)0;    int32 f, best;    phseg_t *s, *nexts;    char *phlatdir;        /* Free old phseg, if any */    for (s = phseg; s; s = nexts) {	nexts = s->next;	listelem_free ((char *)s, sizeof(phseg_t));    }    phseg = NULL;        /* Write phone lattice if specified */    if ((phlatdir = (char *) cmd_ln_access ("-phlatdir")) != NULL)	allphone_latdump (uttid, phlatdir);        /* Find most recent history nodes list */    for (f = curfrm-1; (f >= 0) && (frm_hist[f] == NULL); --f);    if (f >= 0) {	/* Find best of the most recent history nodes */	best = (int32) 0x80000000;	for (h = frm_hist[f]; h; h = h->next) {	    if (h->score > best) {		best = h->score;		besth = h;	    }	}		/* Backtrace */	for (h = besth; h; h = h->hist) {	    s = (phseg_t *) listelem_alloc (sizeof(phseg_t));	    s->ci = h->phmm->ci;	    s->sf = (h->hist) ? h->hist->ef + 1 : 0;	    s->ef = h->ef;	    s->score = h->score;	    if (h->hist)		s->score -= h->hist->score;	    s->score += seg_score_scale (s->sf, s->ef);	    	    s->next = phseg;	    phseg = s;	}    }        E_INFO("%10d history nodes created\n", n_histnode);        /* Free history nodes */    for (f = 0; f < curfrm; f++) {	for (h = frm_hist[f]; h; h = nexth) {	    nexth = h->next;	    listelem_free ((char *) h, sizeof(history_t));	}		frm_hist[f] = NULL;    }        return phseg;}static void phone_tp_init (char *file, float64 floor, float64 wt, float64 ip){    int32 i, j, ct, tot, inspen;    FILE *fp;    char p1[128], p2[128];    s3cipid_t pid1, pid2;    float64 p;        tp = (int32 **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(int32));    inspen = logs3 (ip);        if (! file) {	for (i = 0; i < mdef->n_ciphone; i++)	    for (j = 0; j < mdef->n_ciphone; j++)		tp[i][j] = inspen;	return;    }    for (i = 0; i < mdef->n_ciphone; i++)	for (j = 0; j < mdef->n_ciphone; j++)	    tp[i][j] = S3_LOGPROB_ZERO;        if ((fp = fopen(file, "r")) == NULL)	E_FATAL("fopen(%s,r) failed\n", file);    while (fscanf (fp, "%s %s %d %d", p1, p2, &ct, &tot) == 4) {	pid1 = mdef_ciphone_id (mdef, p1);	if (NOT_S3CIPID(pid1))	    E_FATAL("Bad phone: %s\n", p1);	pid2 = mdef_ciphone_id (mdef, p2);	if (NOT_S3CIPID(pid2))	    E_FATAL("Bad phone: %s\n", p2);		if (tot > 0)	    p = ((float64)ct)/((float64)tot);	else	    p = 0.0;	if (p < floor)	    p = floor;		tp[(unsigned)pid1][(unsigned)pid2] = (int32)(logs3(p) * wt) + inspen;    }        fclose (fp);}int32 allphone_init ( mdef_t *mdef, tmat_t *tmat ){    float64 *f64arg;    char *file;    float64 tpfloor, ip, wt;        chk_tp_uppertri ();        phmm_build ();    file = (char *)cmd_ln_access("-phonetp");    if (! file)	E_ERROR("-phonetpfn argument missing; assuming uniform transition probs\n");    tpfloor = *((float32 *) cmd_ln_access ("-phonetpfloor"));    ip = *((float32 *) cmd_ln_access ("-inspen"));    wt = *((float32 *) cmd_ln_access ("-phonetpwt"));    phone_tp_init (file, tpfloor, wt, ip);        f64arg = (float64 *) cmd_ln_access ("-beam");    beam = logs3 (*f64arg);    E_INFO ("logs3(beam)= %d\n", beam);    frm_hist = (history_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(history_t *));    score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));    phseg = NULL;        return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?