main_decode_anytopo.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,466 行 · 第 1/4 页
C
1,466 行
{ char filename[1024]; FILE *fp; int32 k; sprintf (filename, "%s/%s.bscr", dir, uttid); E_INFO("Writing bestscore file: %s\n", filename); if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } /* Write version no. */ if (fwrite ("0.1\n", sizeof(char), 4, fp) != 4) goto write_error; /* Write binary comment string */ if (fwrite ("*end_comment*\n", sizeof(char), 14, fp) != 14) goto write_error; /* Write byte-ordering magic number */ k = BYTE_ORDER_MAGIC; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write #frames */ k = nfr; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write bestscore/frame */ if (fwrite (score, sizeof(int32), nfr, fp) != nfr) goto write_error; fclose (fp); return; write_error: E_ERROR("fwrite(%s) failed\n", filename); fclose (fp);}#define GAUDEN_EVAL_WINDOW 8/* Lists of senones sharing each mixture Gaussian codebook */typedef struct mgau2sen_s { s3senid_t sen; /* Senone shared by this mixture Gaussian */ struct mgau2sen_s *next; /* Next entry in list for this mixture Gaussian */} mgau2sen_t;/* * Forward Viterbi decode. * Return value: recognition hypothesis with detailed segmentation and score info. */static srch_hyp_t *fwdvit ( /* In: MFC cepstra for input utterance */ int32 nfr, /* In: #frames of input */ char *uttid) /* In: Utterance id, for logging and other use */{ static int32 w; static int32 topn; static int32 **senscr; /* Senone scores for window of frames */ static gauden_dist_t **dist; /* Density values for one mgau in one frame */ static int8 *sen_active; /* [s] TRUE iff s active in current frame */ static int8 *mgau_active; /* [m] TRUE iff m active in current frame */ static mgau2sen_t **mgau2sen; /* Senones sharing mixture Gaussian codebooks */ int32 i, j, k, s, gid, n_sen_active, best; srch_hyp_t *hyp; mgau2sen_t *m2s; float32 **fv; i=0; if (! senscr) { w = feat_window_size (fcb); /* #MFC vectors needed on either side of current frame to compute one feature vector */ topn = *((int32 *) cmd_ln_access("-topn")); E_INFO("The value of topn: %d\n",topn); if (topn > g->n_density) { E_WARN("-topn argument (%d) > #density codewords (%d); set to latter\n", topn, g->n_density); topn = g->n_density; } dist = (gauden_dist_t **) ckd_calloc_2d (g->n_feat, topn, sizeof(gauden_dist_t)); /* * If search limited to given word lattice, or if many codebooks, only active * senones computed in each frame. Allocate space for list of active senones, * and active codebook flags. */ if (inlatdir) { E_INFO("Computing only active codebooks and senones each frame\n"); sen_active = (int8 *) ckd_calloc (sen->n_sen, sizeof(int8)); mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8)); /* Space for senone scores (one frame) */ senscr = (int32 **) ckd_calloc_2d (1, sen->n_sen, sizeof(int32)); } else { E_INFO("Computing all codebooks and senones each frame\n"); sen_active = NULL; mgau_active = NULL; /* Space for senone scores (window of frames) */ senscr = (int32 **) ckd_calloc_2d (GAUDEN_EVAL_WINDOW, sen->n_sen, sizeof(int32)); } /* Initialize mapping from mixture Gaussian to senones */ mgau2sen = (mgau2sen_t **) ckd_calloc (g->n_mgau, sizeof(mgau2sen_t *)); for (s = 0; s < sen->n_sen; s++) { m2s = (mgau2sen_t *) listelem_alloc (sizeof(mgau2sen_t)); m2s->sen = s; m2s->next = mgau2sen[sen->mgau[s]]; mgau2sen[sen->mgau[s]] = m2s; } } if (nfr <= (w<<1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr); return NULL; } ptmr_reset (&tmr_gausen); ptmr_reset (&tmr_fwdsrch); fwd_start_utt (uttid); /* * A feature vector for frame f depends on input MFC vectors [f-w..f+w]. Hence * the feature vector corresponding to the first w and last w input frames is * undefined. We define them by simply replicating the first and last true * feature vectors (presumably silence regions). */ if (sen_active) { for (i = 0; i < nfr; i++) { ptmr_start (&tmr_gausen); fv=feat[i]; /* Compute feature vector for current frame from input speech cepstra */ /* Obtain list of active senones */ fwd_sen_active (sen_active, sen->n_sen); /* Flag all active mixture-gaussian codebooks */ for (gid = 0; gid < g->n_mgau; gid++) mgau_active[gid] = 0; n_sen_active = 0; for (s = 0; s < sen->n_sen; s++) { if (sen_active[s]) { mgau_active[sen->mgau[s]] = 1; n_sen_active++; } } /* Add in CI senones and codebooks if interpolating with CI */ if (interp) { for (s = 0; s < mdef->n_ci_sen; s++) { mgau_active[s] = 1; if (! sen_active[s]) { sen_active[s] = 1; n_sen_active++; } } } pctr_increment (ctr_nsen, n_sen_active); /* Compute topn gaussian density and senones values (for active codebooks) */ best = (int32) 0x80000000; for (gid = 0; gid < g->n_mgau; gid++) { if (mgau_active[gid]) { gauden_dist (g, gid, topn, fv, dist); for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) { s = m2s->sen; if (sen_active[s]) { senscr[0][s] = senone_eval (sen, s, dist, topn); if (best < senscr[0][s]) best = senscr[0][s]; } } } } /* Interpolate CI and CD senones if indicated */ if (interp) { for (s = mdef->n_ci_sen; s < sen->n_sen; s++) { if (sen_active[s]) interp_cd_ci (interp, senscr[0], s, mdef->cd2cisen[s]); } } /* Normalize senone scores (interpolation above can only lower best score) */ for (s = 0; s < sen->n_sen; s++) { if (sen_active[s]) senscr[0][s] -= best; E_INFO("The senone scores %d\n",senscr[0][s]); } senscale[i] = best; ptmr_stop (&tmr_gausen); /* Step HMMs one frame forward */ ptmr_start (&tmr_fwdsrch); bestscr[i] = fwd_frame (senscr[0]); ptmr_stop (&tmr_fwdsrch); if ((i%10) == 9) { printf ("."); fflush (stdout); } } } else { /* Work in groups of GAUDEN_EVAL_WINDOW frames (blocking to improve cache perf) */ assert(feat); for (j = 0; j < nfr; j += GAUDEN_EVAL_WINDOW) { /* Compute Gaussian densities and senone scores for window of frames */ ptmr_start (&tmr_gausen); for (gid = 0; gid < g->n_mgau; gid++) { for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) { /* Compute feature vector for current frame from input speech cepstra */ assert(feat[i]); fv=feat[i]; /* Evaluate mixture Gaussian densities */ gauden_dist (g, gid, topn, fv, dist); /* Compute senone scores */ if (g->n_mgau > 1) { for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) { s = m2s->sen; senscr[k][s] = senone_eval (sen, s, dist, topn); } } else { /* Semi-continuous special case; single shared codebook */ senone_eval_all (sen, dist, topn, senscr[k]); } } } /* Interpolate senones and normalize */ for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) { pctr_increment (ctr_nsen, sen->n_sen); if (interp) interp_all (interp, senscr[k], mdef->cd2cisen, mdef->n_ci_sen); /* Normalize senone scores */ best = (int32)0x80000000; for (s = 0; s < sen->n_sen; s++){ if (best < senscr[k][s]) best = senscr[k][s]; } for (s = 0; s < sen->n_sen; s++) senscr[k][s] -= best; senscale[i] = best; } ptmr_stop (&tmr_gausen); /* Step HMMs one frame forward */ ptmr_start (&tmr_fwdsrch); for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) { bestscr[i] = fwd_frame (senscr[k]); if ((i%10) == 9) { printf ("."); fflush (stdout); } } ptmr_stop (&tmr_fwdsrch); } } printf ("\n"); hyp = fwd_end_utt (); /* Add in senscale into bestscr, turning them into absolute scores */ k = 0; for (i = 0; i < nfr; i++) { k += senscale[i]; bestscr[i] += k; } pctr_increment (ctr_nfrm, nfr); return hyp;}/* Decode the given mfc file and write result to matchfp and matchsegfp */static void decode_utt (int32 nfr, char *uttid){ char *bscrdir; srch_hyp_t *hyp, *h; int32 i, bp, ascr, lscr, scl; float32 *f32arg; float64 lwf; ptmr_reset (&tmr_utt); ptmr_reset (&tmr_fwdvit); ptmr_reset (&tmr_bstpth); ptmr_start (&tmr_utt); ptmr_start (&tmr_fwdvit); pctr_reset(ctr_nfrm); pctr_reset(ctr_nsen); hyp = fwdvit (nfr, uttid); ptmr_stop (&tmr_fwdvit); bp = *((int32 *) cmd_ln_access("-bestpath")); scl = 0; lwf = 1.0; if (hyp != NULL) { if ( *((int32 *) cmd_ln_access("-backtrace")) ) log_hyp_detailed (stdout, hyp, uttid, "FV", "fv"); /* Total acoustic score scaling */ for (i = 0; i < nfr; i++) scl += senscale[i]; /* Total scaled acoustic score and LM score */ ascr = lscr = 0; for (h = hyp; h; h = h->next) { ascr += h->ascr; lscr += h->lscr; } /* Print sanitized recognition */ printf ("FWDVIT: "); log_hypstr (stdout, hyp, uttid, matchexact, ascr + lscr); printf ("FWDXCT: "); log_hypseg (uttid, stdout, hyp, nfr, scl, lwf); lm_cache_stats_dump (lm); /* Check if need to dump bestscore/frame */ if ((bscrdir = (char *) cmd_ln_access ("-bestscoredir")) != NULL) write_bestscore (bscrdir, uttid, bestscr, nfr); /* Check if need to dump or search DAG */ if ((outlatdir || bp) && (dag_build () == 0)) { if (outlatdir) dag_dump (outlatdir, outlat_onlynodes, uttid); /* Perform bestpath DAG search if specified */ if (bp) { ptmr_start (&tmr_bstpth); h = s3flat_fwd_dag_search (uttid); ptmr_stop (&tmr_bstpth); if (h) { hyp = h; f32arg = (float32 *) cmd_ln_access ("-bestpathlw"); lwf = f32arg ? ((*f32arg) / *((float32 *) cmd_ln_access ("-lw"))) : 1.0; } else E_ERROR("%s: Bestpath search failed; using Viterbi result\n", uttid);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?