main_decode_anytopo.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,466 行 · 第 1/4 页

C
1,466
字号
{    char filename[1024];    FILE *fp;    int32 k;        sprintf (filename, "%s/%s.bscr", dir, uttid);    E_INFO("Writing bestscore file: %s\n", filename);    if ((fp = fopen (filename, "wb")) == NULL) {	E_ERROR("fopen(%s,wb) failed\n", filename);	return;    }        /* Write version no. */    if (fwrite ("0.1\n", sizeof(char), 4, fp) != 4)	goto write_error;    /* Write binary comment string */    if (fwrite ("*end_comment*\n", sizeof(char), 14, fp) != 14)	goto write_error;    /* Write byte-ordering magic number */    k = BYTE_ORDER_MAGIC;    if (fwrite (&k, sizeof(int32), 1, fp) != 1)	goto write_error;        /* Write #frames */    k = nfr;    if (fwrite (&k, sizeof(int32), 1, fp) != 1)	goto write_error;        /* Write bestscore/frame */    if (fwrite (score, sizeof(int32), nfr, fp) != nfr)	goto write_error;    fclose (fp);    return;    write_error:    E_ERROR("fwrite(%s) failed\n", filename);    fclose (fp);}#define GAUDEN_EVAL_WINDOW	8/* Lists of senones sharing each mixture Gaussian codebook */typedef struct mgau2sen_s {    s3senid_t sen;		/* Senone shared by this mixture Gaussian */    struct mgau2sen_s *next;	/* Next entry in list for this mixture Gaussian */} mgau2sen_t;/* * Forward Viterbi decode. * Return value: recognition hypothesis with detailed segmentation and score info. */static srch_hyp_t *fwdvit (	/* In: MFC cepstra for input utterance */		      int32 nfr,	/* In: #frames of input */		      char *uttid)	/* In: Utterance id, for logging and other use */{    static int32 w;    static int32 topn;    static int32 **senscr;		/* Senone scores for window of frames */    static gauden_dist_t **dist;	/* Density values for one mgau in one frame */    static int8 *sen_active;		/* [s] TRUE iff s active in current frame */    static int8 *mgau_active;		/* [m] TRUE iff m active in current frame */    static mgau2sen_t **mgau2sen;	/* Senones sharing mixture Gaussian codebooks */    int32 i, j, k, s, gid, n_sen_active, best;    srch_hyp_t *hyp;    mgau2sen_t *m2s;    float32 **fv;    i=0;        if (! senscr) {	w = feat_window_size (fcb);	/* #MFC vectors needed on either side of current					   frame to compute one feature vector */	topn = *((int32 *) cmd_ln_access("-topn"));	E_INFO("The value of topn: %d\n",topn);	if (topn > g->n_density) {	    E_WARN("-topn argument (%d) > #density codewords (%d); set to latter\n",		   topn, g->n_density);	    topn = g->n_density;	}	dist = (gauden_dist_t **) ckd_calloc_2d (g->n_feat, topn, sizeof(gauden_dist_t));	/*	 * If search limited to given word lattice, or if many codebooks, only active	 * senones computed in each frame.   Allocate space for list of active senones,	 * and active codebook flags.	 */	if (inlatdir) {	    E_INFO("Computing only active codebooks and senones each frame\n");	    sen_active = (int8 *) ckd_calloc (sen->n_sen, sizeof(int8));	    mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8));		    /* Space for senone scores (one frame) */	    senscr = (int32 **) ckd_calloc_2d (1, sen->n_sen, sizeof(int32));	} else {	    E_INFO("Computing all codebooks and senones each frame\n");	    sen_active = NULL;	    mgau_active = NULL;		    /* Space for senone scores (window of frames) */	    senscr = (int32 **) ckd_calloc_2d (GAUDEN_EVAL_WINDOW, sen->n_sen,					       sizeof(int32));	}		/* Initialize mapping from mixture Gaussian to senones */	mgau2sen = (mgau2sen_t **) ckd_calloc (g->n_mgau, sizeof(mgau2sen_t *));	for (s = 0; s < sen->n_sen; s++) {	    m2s = (mgau2sen_t *) listelem_alloc (sizeof(mgau2sen_t));	    m2s->sen = s;	    m2s->next = mgau2sen[sen->mgau[s]];	    mgau2sen[sen->mgau[s]] = m2s;	}    }        if (nfr <= (w<<1)) {	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);	return NULL;    }        ptmr_reset (&tmr_gausen);    ptmr_reset (&tmr_fwdsrch);        fwd_start_utt (uttid);    /*     * A feature vector for frame f depends on input MFC vectors [f-w..f+w].  Hence     * the feature vector corresponding to the first w and last w input frames is     * undefined.  We define them by simply replicating the first and last true     * feature vectors (presumably silence regions).     */    if (sen_active) {	for (i = 0; i < nfr; i++) {	    ptmr_start (&tmr_gausen);	    fv=feat[i];	    /* Compute feature vector for current frame from input speech cepstra */	    /* Obtain list of active senones */	    fwd_sen_active (sen_active, sen->n_sen);	    	    /* Flag all active mixture-gaussian codebooks */	    for (gid = 0; gid < g->n_mgau; gid++)		mgau_active[gid] = 0;	    n_sen_active = 0;	    for (s = 0; s < sen->n_sen; s++) {		if (sen_active[s]) {		    mgau_active[sen->mgau[s]] = 1;		    n_sen_active++;		}	    }	    /* Add in CI senones and codebooks if interpolating with CI */	    if (interp) {		for (s = 0; s < mdef->n_ci_sen; s++) {		    mgau_active[s] = 1;		    if (! sen_active[s]) {			sen_active[s] = 1;			n_sen_active++;		    }		}	    }	    pctr_increment (ctr_nsen, n_sen_active);	    	    /* Compute topn gaussian density and senones values (for active codebooks) */	    best = (int32) 0x80000000;	    for (gid = 0; gid < g->n_mgau; gid++) {		if (mgau_active[gid]) {		    gauden_dist (g, gid, topn, fv, dist);		    for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) {			s = m2s->sen;			if (sen_active[s]) {			    senscr[0][s] = senone_eval (sen, s, dist, topn);			    if (best < senscr[0][s])				best = senscr[0][s];			}		    }		}	    }	    /* Interpolate CI and CD senones if indicated */	    if (interp) {		for (s = mdef->n_ci_sen; s < sen->n_sen; s++) {		    if (sen_active[s])			interp_cd_ci (interp, senscr[0], s, mdef->cd2cisen[s]);		}	    }	    /* Normalize senone scores (interpolation above can only lower best score) */	    for (s = 0; s < sen->n_sen; s++) {		if (sen_active[s])		    senscr[0][s] -= best;		E_INFO("The senone scores %d\n",senscr[0][s]);	    }	    senscale[i] = best;	    ptmr_stop (&tmr_gausen);	    /* Step HMMs one frame forward */	    ptmr_start (&tmr_fwdsrch);	    bestscr[i] = fwd_frame (senscr[0]);	    ptmr_stop (&tmr_fwdsrch);	    	    if ((i%10) == 9) {		printf ("."); fflush (stdout);	    }	}    } else {	/* Work in groups of GAUDEN_EVAL_WINDOW frames (blocking to improve cache perf) */      assert(feat);	for (j = 0; j < nfr; j += GAUDEN_EVAL_WINDOW) {	    /* Compute Gaussian densities and senone scores for window of frames */	    ptmr_start (&tmr_gausen);	    for (gid = 0; gid < g->n_mgau; gid++) {	      for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {		    /* Compute feature vector for current frame from input speech cepstra */		assert(feat[i]);		fv=feat[i];		    /* Evaluate mixture Gaussian densities */		    gauden_dist (g, gid, topn, fv, dist);		    /* Compute senone scores */		    if (g->n_mgau > 1) {			for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) {			    s = m2s->sen;			    senscr[k][s] = senone_eval (sen, s, dist, topn);			}		    } else {			/* Semi-continuous special case; single shared codebook */			senone_eval_all (sen, dist, topn, senscr[k]);		    }		}	    }	    /* Interpolate senones and normalize */	    for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {		pctr_increment (ctr_nsen, sen->n_sen);		if (interp)		    interp_all (interp, senscr[k], mdef->cd2cisen, mdef->n_ci_sen);		/* Normalize senone scores */		best = (int32)0x80000000;		for (s = 0; s < sen->n_sen; s++){		    if (best < senscr[k][s])			best = senscr[k][s];		}		for (s = 0; s < sen->n_sen; s++)		    senscr[k][s] -= best;		senscale[i] = best;	    }	    ptmr_stop (&tmr_gausen);			    /* Step HMMs one frame forward */	    ptmr_start (&tmr_fwdsrch);	    for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {		bestscr[i] = fwd_frame (senscr[k]);		if ((i%10) == 9) {		    printf ("."); fflush (stdout);		}	    }	    ptmr_stop (&tmr_fwdsrch);	}    }    printf ("\n");    hyp = fwd_end_utt ();    /* Add in senscale into bestscr, turning them into absolute scores */    k = 0;    for (i = 0; i < nfr; i++) {	k += senscale[i];	bestscr[i] += k;    }    pctr_increment (ctr_nfrm, nfr);    return hyp;}/* Decode the given mfc file and write result to matchfp and matchsegfp */static void decode_utt (int32 nfr, char *uttid){    char *bscrdir;    srch_hyp_t *hyp, *h;    int32 i, bp, ascr, lscr, scl;    float32 *f32arg;    float64 lwf;    ptmr_reset (&tmr_utt);    ptmr_reset (&tmr_fwdvit);    ptmr_reset (&tmr_bstpth);    ptmr_start (&tmr_utt);    ptmr_start (&tmr_fwdvit);    pctr_reset(ctr_nfrm);    pctr_reset(ctr_nsen);    hyp = fwdvit (nfr, uttid);    ptmr_stop (&tmr_fwdvit);    bp = *((int32 *) cmd_ln_access("-bestpath"));    scl = 0;    lwf = 1.0;    if (hyp != NULL) {	if ( *((int32 *) cmd_ln_access("-backtrace")) )	    log_hyp_detailed (stdout, hyp, uttid, "FV", "fv");	/* Total acoustic score scaling */	for (i = 0; i < nfr; i++)	    scl += senscale[i];	/* Total scaled acoustic score and LM score */	ascr = lscr = 0;	for (h = hyp; h; h = h->next) {	    ascr += h->ascr;	    lscr += h->lscr;	}	/* Print sanitized recognition */	printf ("FWDVIT: ");	log_hypstr (stdout, hyp, uttid, matchexact, ascr + lscr);	printf ("FWDXCT: ");	log_hypseg (uttid, stdout, hyp, nfr, scl, lwf);	lm_cache_stats_dump (lm);	/* Check if need to dump bestscore/frame */	if ((bscrdir = (char *) cmd_ln_access ("-bestscoredir")) != NULL)	    write_bestscore (bscrdir, uttid, bestscr, nfr);	/* Check if need to dump or search DAG */	if ((outlatdir || bp) && (dag_build () == 0)) {	    if (outlatdir)		dag_dump (outlatdir, outlat_onlynodes, uttid);	    	    /* Perform bestpath DAG search if specified */	    if (bp) {		ptmr_start (&tmr_bstpth);		h = s3flat_fwd_dag_search (uttid);		ptmr_stop (&tmr_bstpth);				if (h) {		    hyp = h;		    f32arg = (float32 *) cmd_ln_access ("-bestpathlw");		    lwf = f32arg ?			((*f32arg) / *((float32 *) cmd_ln_access ("-lw"))) :			1.0;		} else		    E_ERROR("%s: Bestpath search failed; using Viterbi result\n", uttid);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?