📄 utt.c
字号:
/* Find best word exit in this frame for each distinct word-final CI phone */ vhid = vithist_first_entry (vh, cf); le = vithist_n_entry (vh) - 1; for (; vhid <= le; vhid++) { ve = vithist_id2entry (vh, vhid); if (! vithist_entry_valid(ve)) continue; wid = vithist_entry_wid (ve); p = dict_last_phone (dict, wid); if (mdef_is_fillerphone(mdef, p)) p = mdef_silphone(mdef); score = vithist_entry_score (ve); if (score > bs[p]) { bs[p] = score; bv[p] = vhid; if (maxpscore < score) { maxpscore=score; /* E_INFO("maxscore = %d\n", maxpscore); */ } } } /* Find lextree instance to be entered */ k = kb->n_lextrans++; k = (k % (kb->n_lextree * epl)) / epl; /* Transition to unigram lextrees */ for (p = 0; p < n_ci; p++) { if (bv[p] >= 0) if (kb->beam->wordend==0 || bs[p]> kb->beam->wordend + maxpscore) { /* RAH, typecast p to (s3cipid_t) to make compiler happy */ lextree_enter (kb->ugtree[k], (s3cipid_t) p, cf, bs[p], bv[p], th); } } /* Transition to filler lextrees */ lextree_enter (kb->fillertree[k], BAD_S3CIPID, cf, vh->bestscore[cf], vh->bestvh[cf], th);}void computePhnHeur(mdef_t* md,kb_t* kb,int32 heutype){ int32 nState; int32 i,j; int32 curPhn, curFrmPhnVar; /* variables for phoneme lookahead computation */ nState=mdef_n_emit_state(md); /* Initializing all the phoneme heuristics for each phone to be 0*/ for(j=0;j==md->cd2cisen[j];j++){ curPhn=md->sen2cimap[j]; /*Just to save a warning*/ kb->phn_heur_list[curPhn]=0; } /* 20040503: ARCHAN, the code can be reduced to 10 lines, it is so organized such that there is no overhead in checking the heuristic type in the inner loop. */ /* One trick we use is to use sen2cimap to check phoneme ending boundary */ if(heutype==1){ /* Taking Max */ for(i=kb->pl_win_strt;i<kb->pl_win_efv;i++) { curPhn=0; curFrmPhnVar=MAX_NEG_INT32; for(j=0;j==md->cd2cisen[j];j++) { if (curFrmPhnVar<kb->cache_ci_senscr[i][j]) curFrmPhnVar=kb->cache_ci_senscr[i][j]; curPhn=md->sen2cimap[j]; /* Update at the phone_end boundary */ if (curPhn!= md->sen2cimap[j+1]) { kb->phn_heur_list[curPhn]=NO_UFLOW_ADD(kb->phn_heur_list[curPhn],curFrmPhnVar); curFrmPhnVar=MAX_NEG_INT32; } } } }else if(heutype==2){ for(i=kb->pl_win_strt;i<kb->pl_win_efv;i++) { curPhn=0; curFrmPhnVar=MAX_NEG_INT32; for(j=0;j==md->cd2cisen[j];j++) { curFrmPhnVar=NO_UFLOW_ADD(kb->cache_ci_senscr[i][j],curFrmPhnVar); curPhn=md->sen2cimap[j]; /* Update at the phone_end boundary */ if (curPhn != md->sen2cimap[j+1]) { curFrmPhnVar/=nState; /* ARCHAN: I hate to do division ! */ kb->phn_heur_list[curPhn]=NO_UFLOW_ADD(kb->phn_heur_list[curPhn], curFrmPhnVar); curFrmPhnVar=MAX_NEG_INT32; } } } }else if(heutype==3){ for(i=kb->pl_win_strt;i<kb->pl_win_efv;i++) { curPhn=0; curFrmPhnVar=MAX_NEG_INT32; for(j=0;j==md->cd2cisen[j];j++) { if (curPhn==0 || curPhn != md->sen2cimap[j-1]) /* dangerous hack! */ kb->phn_heur_list[curPhn]=NO_UFLOW_ADD(kb->phn_heur_list[curPhn],kb->cache_ci_senscr[i][j]); curPhn=md->sen2cimap[j]; if (curFrmPhnVar<kb->cache_ci_senscr[i][j]) curFrmPhnVar=kb->cache_ci_senscr[i][j]; /* Update at the phone_end boundary */ if (md->sen2cimap[j] != md->sen2cimap[j+1]) { kb->phn_heur_list[curPhn]=NO_UFLOW_ADD(kb->phn_heur_list[curPhn],curFrmPhnVar); curFrmPhnVar=MAX_NEG_INT32; } } } }#if 0 for(j=0;j==md->cd2cisen[j];j++) { curPhn=md->cd2cisen[j]; E_INFO("phoneme heuristics scores at phn %d is %d\n",j, kb->phn_heur_list[mdef->sen2cimap[j]]); }#endif}/* Invoked by ctl_process and ctl_process_dyn_lm in libs3decoder/corpus.c *//* Arthur: I hope that this function is less than 500 lines. This will allow next programmer still able to read it. Complexity of phoneme lookahead still lies on this function. (Around 50 lines.) Let it be for a while. I also don't want to make this part too crufty. */void utt_decode (void *data, char *uttfile, int32 sf, int32 ef, char *uttid){ kb_t *kb; kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; mgau_model_t *mgau; subvq_t *svq; gs_t * gs; lextree_t *lextree; int32 besthmmscr, bestwordscr, th, pth, wth, maxwpf, maxhistpf, maxhmmpf, ptranskip; int32 i, j, f; int32 n_hmm_eval, frm_nhmm, hb, pb, wb; FILE *hmmdumpfp; float32 factor; int32 pheurtype; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; mdef = kbcore_mdef (kbcore); dict = kbcore_dict (kbcore); d2p = kbcore_dict2pid (kbcore); mgau = kbcore_mgau (kbcore); svq = kbcore_svq (kbcore); gs = kbcore_gs(kbcore); kb->uttid = uttid; maxwpf = kb->histprune->maxwpf; maxhistpf = kb->histprune->maxwpf; maxhmmpf = kb->histprune->maxhmmpf; ptranskip = kb->beam->ptranskip; hmmdumpfp = cmd_ln_int32("-hmmdump") ? stderr : NULL; pheurtype = cmd_ln_int32 ("-pheurtype"); /* Read mfc file and build feature vectors for entire utterance */ kb->nfr = feat_s2mfc2feat(kbcore_fcb(kbcore), uttfile, cmd_ln_str("-cepdir"),".mfc", sf, ef, kb->feat, S3_MAX_FRAMES); factor = (float32) log_to_logs3_factor(); for (i = 0; i < kb->hmm_hist_bins; i++) kb->hmm_hist[i] = 0; utt_begin (kb); n_hmm_eval = 0; kb->utt_sen_eval = 0; kb->utt_gau_eval = 0; kb->utt_cisen_eval = 0; kb->utt_cigau_eval = 0; /* initialization of ci-phoneme look-ahead scores */ ptmr_start (&(kb->tm_sen)); /* effective window is the same as pl_win because we're not in live * mode */ kb->pl_win_efv = kb->pl_win > kb->nfr ? kb->nfr : kb->pl_win; kb->pl_win_strt=0; /* feat_print(kb->kbcore->fcb,kb->feat,kb->nfr,stderr);*/ for(f = 0; f < kb->pl_win_efv; f++){ /*Compute the CI phone score at here */ kb->cache_best_list[f]=MAX_NEG_INT32; approx_cont_mgau_ci_eval(kb->kbcore,kb->fastgmm,kb->kbcore->mdef,kb->feat[f][0],kb->cache_ci_senscr[f]); kb->utt_cisen_eval += mgau_frm_cisen_eval(kb->kbcore->mgau); kb->utt_cigau_eval += mgau_frm_cigau_eval(kb->kbcore->mgau); /* E_INFO("%d %d\n",kb->utt_cisen_eval,kb->utt_cigau_eval);*/ for(i=0;i==mdef->cd2cisen[i];i++){ if(kb->cache_ci_senscr[f][i]>kb->cache_best_list[f]) kb->cache_best_list[f]=kb->cache_ci_senscr[f][i]; } } ptmr_stop (&(kb->tm_sen)); fflush(stderr); for (f = 0; f < kb->nfr; f++) { /* Acoustic (senone scores) evaluation */ ptmr_start (&(kb->tm_sen)); /* Find active senones and composite senones, from active lextree nodes */ /*The active senones will also be changed in approx_cont_mgau_frame_eval */ if (kb->sen_active) { memset (kb->ssid_active, 0, mdef_n_sseq(mdef) * sizeof(int32)); memset (kb->comssid_active, 0, dict2pid_n_comsseq(d2p) * sizeof(int32)); /* Find active senone-sequence IDs (including composite ones) */ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_ssid_active (lextree, kb->ssid_active, kb->comssid_active); } /* Find active senones from active senone-sequences */ memset (kb->sen_active, 0, mdef_n_sen(mdef) * sizeof(int32)); mdef_sseq2sen_active (mdef, kb->ssid_active, kb->sen_active); /* Add in senones needed for active composite senone-sequences */ dict2pid_comsseq2sen_active (d2p, mdef, kb->comssid_active, kb->sen_active); } /* Always use the first buffer in the cache*/ /* Remember, this function will always back off to the simplest GMM computation by default. So don't worry about fast-computation/adaptation issues :-)*/#if _DEBUG_GSCORE_ for(i=0;i<mgau_veclen(kb->kbcore->mgau);i++){ fprintf(stderr,"%f ",kb->feat[f][0][i]); } fprintf(stderr,"\n"); fflush(stderr);#endif approx_cont_mgau_frame_eval(kb->kbcore, kb->fastgmm, kb->feat[f][0], f, kb->sen_active, kb->rec_sen_active, kb->ascr->sen, kb->cache_ci_senscr[kb->pl_win_strt], &(kb->tm_ovrhd)); kb->utt_sen_eval += mgau_frm_sen_eval(mgau); kb->utt_gau_eval += mgau_frm_gau_eval(mgau); /* Evaluate composite senone scores from senone scores */ dict2pid_comsenscr (kbcore_dict2pid(kbcore), kb->ascr->sen, kb->ascr->comsen); ptmr_stop (&(kb->tm_sen)); /* Search */ ptmr_start (&(kb->tm_srch)); /* Compute phoneme heuristics */ /* Determine which set of phonemes should be active in next stage using the lookahead information*/ /* Notice that this loop can be further optimized by implementing it incrementally*/ /* ARCHAN and JSHERWAN Eventually, this is implemented as a function */ if(pheurtype!=0) computePhnHeur(mdef,kb,pheurtype); /* Evaluate active HMMs in each lextree; note best HMM state score */ besthmmscr = MAX_NEG_INT32; bestwordscr = MAX_NEG_INT32; frm_nhmm = 0; for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; if (hmmdumpfp != NULL) fprintf (hmmdumpfp, "Fr %d Lextree %d #HMM %d\n", f, i, lextree->n_active); lextree_hmm_eval (lextree, kbcore, kb->ascr, f, hmmdumpfp); if (besthmmscr < lextree->best) besthmmscr = lextree->best; if (bestwordscr < lextree->wbest) bestwordscr = lextree->wbest;#if 0 E_INFO("lextree->best %d\n",lextree->best); E_INFO("best score %d at time %d, tree %d: af compute repl.\n",besthmmscr,f,i);#endif n_hmm_eval += lextree->n_active; frm_nhmm += lextree->n_active; } if (besthmmscr > 0) { E_ERROR("***ERROR*** Fr %d, best HMM score > 0 (%d); int32 wraparound?\n", f, besthmmscr); } kb->hmm_hist[frm_nhmm / kb->hmm_hist_binsize]++; /* This part should be written as functions */ /* Set pruning threshold depending on whether number of active HMMs within limit */ if (frm_nhmm > (maxhmmpf + (maxhmmpf >> 1))) { int32 *bin, nbin, bw; /* Use histogram pruning */ nbin = 1000; bw = -(kb->beam->hmm) / nbin; bin = (int32 *) ckd_calloc (nbin, sizeof(int32)); for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_hmm_histbin (lextree, besthmmscr, bin, nbin, bw); } for (i = 0, j = 0; (i < nbin) && (j < maxhmmpf); i++, j += bin[i]); ckd_free ((void *) bin); /* Determine hmm, phone, word beams */ hb = -(i * bw); pb = (hb > kb->beam->ptrans) ? hb : kb->beam->ptrans; wb = (hb > kb->beam->word) ? hb : kb->beam->word;#if 0 E_INFO("Fr %5d, #hmm= %6d, #bin= %d, #hmm= %6d, beam= %8d, pbeam= %8d, wbeam= %8d\n", f, frm_nhmm, i, j, hb, pb, wb);#endif } else { hb = kb->beam->hmm; pb = kb->beam->ptrans; wb = kb->beam->word; } kb->bestscore = besthmmscr; kb->bestwordscore = bestwordscr; th = kb->bestscore + hb; /* HMM survival threshold */ pth = kb->bestscore + pb; /* Cross-HMM transition threshold */ wth = kb->bestwordscore + wb; /* Word exit threshold */ /* * For each lextree, determine if the active HMMs remain active for next * frame, propagate scores across HMM boundaries, and note word exits. */ /* By ARCHAN 20040510, This segment is longer than it should be to cope with ICC*/ if(ptranskip==0){ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; lextree_hmm_propagate (lextree, kbcore, kb->vithist, f, th, pth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype); } }else{ for (i = 0; i < (kb->n_lextree <<1); i++) { lextree = (i < kb->n_lextree) ? kb->ugtree[i] : kb->fillertree[i - kb->n_lextree]; if ((f % ptranskip) != 0) lextree_hmm_propagate (lextree, kbcore, kb->vithist, f, th, pth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype); else lextree_hmm_propagate (lextree, kbcore, kb->vithist, f, th, wth, wth,kb->phn_heur_list,kb->pl_beam,pheurtype);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -