📄 utt.c
字号:
} } ptmr_stop (&(kb->tm_srch)); /*Slide the window and compute the next frame of ci phone scores, Shift the scores in the cache by one frame and compute the new frames of CI senone score if necessary and always put it at the end*/ /* * Shift the window for look-ahead of one frame, update scores in the buffer. */ ptmr_start (&(kb->tm_sen)); ptmr_start (&(kb->tm_ovrhd)); if(f<kb->nfr-kb->pl_win_efv){ for(i=0;i<kb->pl_win_efv-1;i++){ kb->cache_best_list[i]=kb->cache_best_list[i+1]; for(j=0;j==mdef->cd2cisen[j];j++){ kb->cache_ci_senscr[i][j]=kb->cache_ci_senscr[i+1][j]; } } approx_cont_mgau_ci_eval(kb->kbcore, kb->fastgmm, kb->kbcore->mdef, kb->feat[f+kb->pl_win_efv][0], kb->cache_ci_senscr[kb->pl_win_efv-1]); kb->utt_cisen_eval += mgau_frm_cisen_eval(kb->kbcore->mgau); kb->utt_cigau_eval += mgau_frm_cigau_eval(kb->kbcore->mgau); kb->cache_best_list[kb->pl_win_efv-1]=MAX_NEG_INT32; for(i=0;i==mdef->cd2cisen[i];i++){ if(kb->cache_ci_senscr[kb->pl_win_efv-1][i]>kb->cache_best_list[kb->pl_win_efv-1]) kb->cache_best_list[kb->pl_win_efv-1]=kb->cache_ci_senscr[kb->pl_win_efv-1][i]; } } else { /* We are near the end of the block, so shrink the window from the left*/ kb->pl_win_strt++; } ptmr_stop (&(kb->tm_ovrhd)); ptmr_stop (&(kb->tm_sen)); ptmr_start (&(kb->tm_srch)); /* Limit vithist entries created this frame to specified max */ vithist_prune (kb->vithist, dict, f, maxwpf, maxhistpf, wb); /* Cross-word transitions */ utt_word_trans (kb, f); /* Wind up this frame */ vithist_frame_windup (kb->vithist, f, NULL, kbcore); kb_lextree_active_swap (kb); ptmr_stop (&(kb->tm_srch)); if ((f % 100) == 0) { fprintf (stderr, "."); fflush (stderr); } } kb->utt_hmm_eval = n_hmm_eval; utt_end (kb); kb->tot_fr += kb->nfr; fprintf (stdout, "\n");}#if 1/* ARCHAN: The speed up version of the function *//* This function decodes a block of incoming feature vectors. * Feature vectors have to be computed by the calling routine. * The utterance level index of the last feature vector decoded * (before the current block) must be passed. * The current status of the decode is stored in the kb structure that * is passed in. */void utt_decode_block (float ***block_feat, /* Incoming block of featurevecs */ int32 block_nfeatvec, /* No. of vecs in cepblock */ int32 *curfrm, /* Utterance level index of frames decoded so far */ kb_t *kb, /* kb structure with all model and decoder info */ FILE *hmmdumpfp) /* dump file */{ kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; mgau_model_t *mgau; subvq_t *svq; gs_t * gs; lextree_t *lextree; int32 besthmmscr, bestwordscr, th, pth, wth; int32 i, j, t; int32 n_hmm_eval; int32 frmno; int32 frm_nhmm, hb, pb, wb; int32 f; int32 maxwpf; /* Max words per frame */ int32 maxhistpf; /* Max histories per frame */ int32 maxhmmpf; /* Max active HMMs per frame */ int32 ptranskip; /* intervals at which wbeam is used for phone transitions */ int32 pheurtype; pheurtype = cmd_ln_int32 ("-pheurtype"); kbcore = kb->kbcore; mdef = kbcore_mdef (kbcore); dict = kbcore_dict (kbcore); d2p = kbcore_dict2pid (kbcore); mgau = kbcore_mgau (kbcore); svq = kbcore_svq (kbcore); gs = kbcore_gs(kbcore); maxwpf = kb->histprune->maxwpf; maxhistpf = kb->histprune->maxwpf; maxhmmpf = kb->histprune->maxhmmpf; ptranskip = kb->beam->ptranskip; frmno = *curfrm; for (i = 0; i < kb->hmm_hist_bins; i++) kb->hmm_hist[i] = 0; n_hmm_eval = 0; ptmr_start (&(kb->tm_sen)); /* the effective window is the min of (kb->pl_win, block_nfeatvec) */ kb->pl_win_efv = kb->pl_win > block_nfeatvec ? block_nfeatvec : kb->pl_win; kb->pl_win_strt=0; for(f = 0; f < kb->pl_win_efv; f++){ /*Compute the CI phone score at here */ kb->cache_best_list[f]=MAX_NEG_INT32; approx_cont_mgau_ci_eval(kb->kbcore, kb->fastgmm, kb->kbcore->mdef, /* approx_cont_mgau_ci_eval(mgau, kb->kbcore->mdef,*/ block_feat[f][0], kb->cache_ci_senscr[f]); kb->utt_cisen_eval += mgau_frm_cisen_eval(kb->kbcore->mgau); kb->utt_cigau_eval += mgau_frm_cigau_eval(kb->kbcore->mgau); for(i=0;i==mdef->cd2cisen[i];i++){ if(kb->cache_ci_senscr[f][i]>kb->cache_best_list[f]) kb->cache_best_list[f]=kb->cache_ci_senscr[f][i]; } } ptmr_stop (&(kb->tm_sen)); for (t = 0; t < block_nfeatvec; t++,frmno++) { /* Acoustic (senone scores) evaluation */ ptmr_start (&(kb->tm_sen)); /* Find active senones and composite senones, from active lextree nodes */ /*The active senones will also be changed in approx_cont_mgau_frame_eval */ if (kb->sen_active) { memset (kb->ssid_active, 0, mdef_n_sseq(mdef) * sizeof(int32)); memset (kb->comssid_active, 0, dict2pid_n_comsseq(d2p) * sizeof(int32)); /* Find active senone-sequence IDs (including composite ones) */ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_ssid_active (lextree, kb->ssid_active, kb->comssid_active); } /* Find active senones from active senone-sequences */ memset (kb->sen_active, 0, mdef_n_sen(mdef) * sizeof(int32)); mdef_sseq2sen_active (mdef, kb->ssid_active, kb->sen_active); /* Add in senones needed for active composite senone-sequences */ dict2pid_comsseq2sen_active (d2p, mdef, kb->comssid_active, kb->sen_active); } /* Always use the first buffer in the cache*/ /* Why I didn't make a pointer of sen and sen_active to fast_gmm_t? Because pointer is confusing. */ approx_cont_mgau_frame_eval(kb->kbcore, kb->fastgmm, block_feat[t][0], t, kb->sen_active, kb->rec_sen_active, kb->ascr->sen, kb->cache_ci_senscr[kb->pl_win_strt], &(kb->tm_ovrhd)); kb->utt_sen_eval += mgau_frm_sen_eval(mgau); kb->utt_gau_eval += mgau_frm_gau_eval(mgau); /* Evaluate composite senone scores from senone scores */ dict2pid_comsenscr (kbcore_dict2pid(kbcore), kb->ascr->sen, kb->ascr->comsen); ptmr_stop (&(kb->tm_sen)); /* Search */ ptmr_start (&(kb->tm_srch)); /* Compute phoneme heuristics */ /* Determine which set of phonemes should be active in next stage using the lookahead information*/ /* Notice that this loop can be further optimized by implementing it incrementally*/ /* ARCHAN and JSHERWAN Eventually, this is implemented as a function */ if(pheurtype!=0) computePhnHeur(mdef,kb,pheurtype); /* Evaluate active HMMs in each lextree; note best HMM state score */ besthmmscr = MAX_NEG_INT32; bestwordscr = MAX_NEG_INT32; frm_nhmm = 0; for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; if (hmmdumpfp != NULL) fprintf (hmmdumpfp, "Fr %d Lextree %d #HMM %d\n", frmno, i, lextree->n_active); lextree_hmm_eval (lextree, kbcore, kb->ascr, frmno, hmmdumpfp); if (besthmmscr < lextree->best) besthmmscr = lextree->best; if (bestwordscr < lextree->wbest) bestwordscr = lextree->wbest; n_hmm_eval += lextree->n_active; frm_nhmm += lextree->n_active; } if (besthmmscr > 0) { E_ERROR("***ERROR*** Fr %d, best HMM score > 0 (%d); int32 wraparound?\n", frmno, besthmmscr); } kb->hmm_hist[frm_nhmm / kb->hmm_hist_binsize]++; /* Set pruning threshold depending on whether number of active HMMs * is within limit */ if (frm_nhmm > (maxhmmpf + (maxhmmpf >> 1))) { int32 *bin, nbin, bw; /* Use histogram pruning */ nbin = 1000; bw = -(kb->beam->hmm) / nbin; bin = (int32 *) ckd_calloc (nbin, sizeof(int32)); for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_hmm_histbin (lextree, besthmmscr, bin, nbin, bw); } for (i = 0, j = 0; (i < nbin) && (j < maxhmmpf); i++, j += bin[i]); ckd_free ((void *) bin); /* Determine hmm, phone, word beams */ hb = -(i * bw); pb = (hb > kb->beam->ptrans) ? hb : kb->beam->ptrans; wb = (hb > kb->beam->word) ? hb : kb->beam->word; } else { hb = kb->beam->hmm; pb = kb->beam->ptrans; wb = kb->beam->word; } kb->bestscore = besthmmscr; kb->bestwordscore = bestwordscr; th = kb->bestscore + hb; /* HMM survival threshold */ pth = kb->bestscore + pb; /* Cross-HMM transition threshold */ wth = kb->bestwordscore + wb; /* Word exit threshold */ /* * For each lextree, determine if the active HMMs remain active for next * frame, propagate scores across HMM boundaries, and note word exits. */ /* Hack! Use narrow phone transition beam (wth) every few frames */ /* ARCHAN 20040509 : please read the comment in utt_decode to see why this loop is implemented like this */ if(ptranskip==0){ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_hmm_propagate(lextree, kbcore, kb->vithist, frmno, th, pth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype); } }else{ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; if ((frmno % ptranskip) != 0) lextree_hmm_propagate(lextree, kbcore, kb->vithist, frmno, th, pth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype); else lextree_hmm_propagate(lextree, kbcore, kb->vithist, frmno, th, wth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype); } } ptmr_stop (&(kb->tm_srch)); ptmr_start (&(kb->tm_sen)); ptmr_start (&(kb->tm_ovrhd)); /* if the current block's current frame (t) is less than the total frames in this block minus the efv window */ if(t<block_nfeatvec-kb->pl_win_efv){ for(i=0;i<kb->pl_win_efv-1;i++){ kb->cache_best_list[i]=kb->cache_best_list[i+1]; for(j=0;j==mdef->cd2cisen[j];j++){ kb->cache_ci_senscr[i][j]=kb->cache_ci_senscr[i+1][j]; } } /* get the CI sen scores for the t+pl_win'th frame (a slice) */ approx_cont_mgau_ci_eval(kb->kbcore, kb->fastgmm, kb->kbcore->mdef, block_feat[t+kb->pl_win_efv][0], kb->cache_ci_senscr[kb->pl_win_efv-1]); kb->utt_cisen_eval += mgau_frm_cisen_eval(kb->kbcore->mgau); kb->utt_cigau_eval += mgau_frm_cigau_eval(kb->kbcore->mgau); kb->cache_best_list[kb->pl_win_efv-1]=MAX_NEG_INT32; for(i=0;i==mdef->cd2cisen[i];i++){ if(kb->cache_ci_senscr[kb->pl_win_efv-1][i]>kb->cache_best_list[kb->pl_win_efv-1]) kb->cache_best_list[kb->pl_win_efv-1]=kb->cache_ci_senscr[kb->pl_win_efv-1][i]; } } else { /* We are near the end of the block, so shrink the window from the left*/ kb->pl_win_strt++; } ptmr_stop (&(kb->tm_ovrhd)); ptmr_stop (&(kb->tm_sen)); ptmr_start (&(kb->tm_srch)); /* Limit vithist entries created this frame to specified max */ vithist_prune (kb->vithist, dict, frmno, maxwpf, maxhistpf, wb); /* Cross-word transitions */ utt_word_trans (kb, frmno); /* Wind up this frame */ vithist_frame_windup (kb->vithist, frmno, NULL, kbcore); kb_lextree_active_swap (kb); ptmr_stop (&(kb->tm_srch)); } kb->utt_hmm_eval += n_hmm_eval; kb->nfr += block_nfeatvec; *curfrm = frmno;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -