📄 search.c
字号:
n_active_word[nf & 0x1] = nawl - active_word_list[nf & 0x1]; /* * Prune permanently allocated single-phone channels. * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere. */ for (i = 0; i < n_1ph_words; i++) { w = single_phone_wid[i]; rhmm = (ROOT_CHAN_T *) word_chan[w]; if (rhmm->active < cf) continue; if (rhmm->bestscore > lastphn_thresh) { rhmm->active = nf; /* Could if ((! skip_alt_frm) || (cf & 0x1)) the following */ if (rhmm->score[HMM_LAST_STATE] > newword_thresh) { save_bwd_ptr(w, rhmm->score[HMM_LAST_STATE], rhmm->path[HMM_LAST_STATE], 0); } } }}/* * Allocate last phone channels for all possible right contexts for word w. (Some * may already exist.) * (NOTE: Assume that w uses context!!) */voidalloc_all_rc(int32 w){ dict_entry_t *de; CHAN_T *hmm, *thmm; int32 *sseq_rc; /* list of sseqid for all possible right context for w */ int32 i; de = word_dict->dict_list[w]; assert(de->mpx); sseq_rc = RightContextFwd[de->phone_ids[de->len - 1]]; hmm = word_chan[w]; if ((hmm == NULL) || (hmm->sseqid != *sseq_rc)) { hmm = (CHAN_T *) listelem_alloc(sizeof(CHAN_T)); hmm->next = word_chan[w]; word_chan[w] = hmm; hmm->info.rc_id = 0; hmm->ciphone = de->ci_phone_ids[de->len - 1]; hmm->bestscore = WORST_SCORE; hmm->score[0] = WORST_SCORE; hmm->score[1] = WORST_SCORE; hmm->score[2] = WORST_SCORE;#if HMM_5_STATE hmm->score[3] = WORST_SCORE; hmm->score[4] = WORST_SCORE;#endif hmm->active = -1; hmm->sseqid = *sseq_rc; } for (i = 1, sseq_rc++; *sseq_rc >= 0; sseq_rc++, i++) { if ((hmm->next == NULL) || (hmm->next->sseqid != *sseq_rc)) { thmm = (CHAN_T *) listelem_alloc(sizeof(CHAN_T)); thmm->next = hmm->next; hmm->next = thmm; hmm = thmm; hmm->info.rc_id = i; hmm->ciphone = de->ci_phone_ids[de->len - 1]; hmm->bestscore = WORST_SCORE; hmm->score[0] = WORST_SCORE; hmm->score[1] = WORST_SCORE; hmm->score[2] = WORST_SCORE;#if HMM_5_STATE hmm->score[3] = WORST_SCORE; hmm->score[4] = WORST_SCORE;#endif hmm->active = -1; hmm->sseqid = *sseq_rc; } else hmm = hmm->next; }}voidfree_all_rc(int32 w){ CHAN_T *hmm, *thmm; for (hmm = word_chan[w]; hmm; hmm = thmm) { thmm = hmm->next; listelem_free(hmm, sizeof(CHAN_T)); } word_chan[w] = NULL;}/* * Structure for reorganizing the BP table entries in the current frame according * to distinct right context ci-phones. Each entry contains the best BP entry for * a given right context. Each successor word will pick up the correct entry based * on its first ci-phone. */struct bestbp_rc_s { int32 score; int32 path; /* BP table index corresponding to this entry */ int32 lc; /* right most ci-phone of above BP entry word */} *bestbp_rc;voidword_transition(void){ int32 i, k, bp, w, cf, nf; /* int32 prev_bp, prev_wid, prev_endframe, prev2_bp, prev2_wid; */ int32 /* rcsize, */ rc; int32 *rcss; /* right context score stack */ int32 *rcpermtab; int32 thresh, /* newword_thresh, */ newscore; BPTBL_T *bpe; dict_entry_t *pde, *de; /* previous dict entry, dict entry */ ROOT_CHAN_T *rhmm; /* CHAN_T *hmm; */ struct bestbp_rc_s *bestbp_rc_ptr; int32 last_ciph; int32 /* fwid0, fwid1, */ fwid2; int32 pip; int32 ssid; cf = CurrentFrame; /* * Transition to start of new word instances (HMM tree roots); but only if words * other than </s> finished here. * But, first, find the best starting score for each possible right context phone. */ for (i = NumCiPhones - 1; i >= 0; --i) bestbp_rc[i].score = WORST_SCORE; k = 0; for (bp = BPTableIdx[cf]; bp < BPIdx; bp++) { bpe = &(BPTable[bp]); WordLatIdx[bpe->wid] = NO_BP; if (bpe->wid == FinishWordId) continue; k++; de = word_dict->dict_list[bpe->wid]; rcpermtab = (bpe->r_diph >= 0) ? RightContextFwdPerm[bpe->r_diph] : zeroPermTab; last_ciph = de->ci_phone_ids[de->len - 1]; rcss = &(BScoreStack[bpe->s_idx]); for (rc = NumCiPhones - 1; rc >= 0; --rc) { if (rcss[rcpermtab[rc]] > bestbp_rc[rc].score) { bestbp_rc[rc].score = rcss[rcpermtab[rc]]; bestbp_rc[rc].path = bp; bestbp_rc[rc].lc = last_ciph; } } } if (k == 0) return; nf = cf + 1; thresh = BestScore + DynamicLogBeamWidth; pip = logPhoneInsertionPenalty; /* * Hypothesize successors to words finished in this frame. * Main dictionary, multi-phone words transition to HMM-trees roots. */ for (i = n_root_chan, rhmm = root_chan; i > 0; --i, rhmm++) { bestbp_rc_ptr = &(bestbp_rc[rhmm->ciphone]); if (npa[rhmm->ciphone]) { newscore = bestbp_rc_ptr->score + newword_penalty + pip; if (newscore > thresh) { if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) { ssid = LeftContextFwd[rhmm->diphone][bestbp_rc_ptr->lc]; rhmm->score[0] = newscore; rhmm->path[0] = bestbp_rc_ptr->path; rhmm->active = nf; rhmm->sseqid[0] = ssid; } } } } /* * Single phone words; no right context for these. Cannot use bestbp_rc as * LM scores have to be included. First find best transition to these words. */ for (i = 0; i < n_1ph_LMwords; i++) { w = single_phone_wid[i]; last_ltrans[w].dscr = (int32) 0x80000000; } for (bp = BPTableIdx[cf]; bp < BPIdx; bp++) { bpe = &(BPTable[bp]); if (!bpe->valid) continue; rcpermtab = (bpe->r_diph >= 0) ? RightContextFwdPerm[bpe->r_diph] : zeroPermTab; rcss = BScoreStack + bpe->s_idx; for (i = 0; i < n_1ph_LMwords; i++) { w = single_phone_wid[i]; de = word_dict->dict_list[w]; fwid2 = de->fwid; newscore = rcss[rcpermtab[de->ci_phone_ids[0]]]; newscore += lm_tg_score(bpe->prev_real_fwid, bpe->real_fwid, fwid2); if (last_ltrans[w].dscr < newscore) { last_ltrans[w].dscr = newscore; last_ltrans[w].bp = bp; } } } /* Now transition to in-LM single phone words */ for (i = 0; i < n_1ph_LMwords; i++) { w = single_phone_wid[i]; rhmm = (ROOT_CHAN_T *) word_chan[w]; if ((w != FinishWordId) && (!npa[rhmm->ciphone])) continue; if ((newscore = last_ltrans[w].dscr + pip) > thresh) { bpe = BPTable + last_ltrans[w].bp; pde = word_dict->dict_list[bpe->wid]; if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) { rhmm->score[0] = newscore; rhmm->path[0] = last_ltrans[w].bp; if (rhmm->mpx) rhmm->sseqid[0] = LeftContextFwd[rhmm->diphone][pde-> ci_phone_ids[pde-> len - 1]]; rhmm->active = nf; } } } /* Remaining words: <sil>, noise words. No mpx for these! */ bestbp_rc_ptr = &(bestbp_rc[SilencePhoneId]); newscore = bestbp_rc_ptr->score + SilenceWordPenalty + pip; if (newscore > thresh) { w = SilenceWordId; rhmm = (ROOT_CHAN_T *) word_chan[w]; if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) { rhmm->score[0] = newscore; rhmm->path[0] = bestbp_rc_ptr->path; rhmm->active = nf; } } newscore = bestbp_rc_ptr->score + FillerWordPenalty + pip; if (newscore > thresh) { for (w = SilenceWordId + 1; w < NumWords; w++) { rhmm = (ROOT_CHAN_T *) word_chan[w]; if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) { rhmm->score[0] = newscore; rhmm->path[0] = bestbp_rc_ptr->path; rhmm->active = nf; } } }}voidsearch_initialize(void){ int32 bptable_size = cmd_ln_int32("-latsize");#if SEARCH_TRACE_CHAN_DETAILED static void load_trace_wordlist();#endif NumWords = word_dict->dict_entry_count; StartWordId = kb_get_word_id(cmd_ln_str("-lmstartsym")); FinishWordId = kb_get_word_id(cmd_ln_str("-lmendsym")); SilenceWordId = kb_get_word_id("SIL"); SilencePhoneId = phone_to_id("SIL", TRUE); NumCiPhones = phoneCiCount(); LeftContextFwd = dict_left_context_fwd(); RightContextFwd = dict_right_context_fwd(); RightContextFwdPerm = dict_right_context_fwd_perm(); RightContextFwdSize = dict_right_context_fwd_size(); LeftContextBwd = dict_left_context_bwd(); LeftContextBwdPerm = dict_left_context_bwd_perm(); LeftContextBwdSize = dict_left_context_bwd_size(); RightContextBwd = dict_right_context_bwd(); NumMainDictWords = dict_get_num_main_words(word_dict); word_chan = ckd_calloc(NumWords, sizeof(CHAN_T *)); WordLatIdx = ckd_calloc(NumWords, sizeof(int32)); zeroPermTab = ckd_calloc(phoneCiCount(), sizeof(int32)); word_active = ckd_calloc(NumWords, sizeof(int32)); if (NumWords / 1000 < 25) BPTableSize = 25 * MAX_FRAMES; else BPTableSize = NumWords / 1000 * MAX_FRAMES; BScoreStackSize = BPTableSize * 20; if ((bptable_size > 0) && (bptable_size < 0x7fffffff)) { BPTableSize = bptable_size; BScoreStackSize = BPTableSize * 20; /* 20 = ave. rc fanout */ } BPTable = ckd_calloc(BPTableSize, sizeof(BPTBL_T)); BScoreStack = ckd_calloc(BScoreStackSize, sizeof(int32)); BPTableIdx = ckd_calloc(MAX_FRAMES + 2, sizeof(int32)); BPTableIdx++; /* Make BPTableIdx[-1] valid */ lattice_density = ckd_calloc(MAX_FRAMES, sizeof(int32)); init_search_tree(word_dict); active_word_list[0] = ckd_calloc(2 * (NumWords + 1), sizeof(WORD_ID)); active_word_list[1] = active_word_list[0] + NumWords + 1; bestbp_rc = ckd_calloc(NumCiPhones, sizeof(struct bestbp_rc_s));#if SEARCH_TRACE_CHAN_DETAILED load_trace_wordlist("_TRACEWORDS_");#endif lastphn_cand = ckd_calloc(NumWords, sizeof(lastphn_cand_t)); senone_active = ckd_calloc(bin_mdef_n_sen(mdef), sizeof(int32)); senone_active_vec = ckd_calloc((bin_mdef_n_sen(mdef) + BITVEC_WIDTH - 1) / BITVEC_WIDTH, sizeof(bitvec_t)); /* If we are doing two-pass search, cache the senone scores from * the first pass (trading off memory for speed). */ if (cmd_ln_boolean("-fwdtree") && cmd_ln_boolean("-fwdflat") && cmd_ln_boolean("-cachesen")) { past_senone_scores = ckd_calloc(MAX_FRAMES, sizeof(int32 *)); past_senone_active_vec = ckd_calloc(MAX_FRAMES, sizeof(bitvec_t *)); } last_ltrans = ckd_calloc(NumWords, sizeof(last_ltrans_t)); search_fwdflat_init(); searchlat_init(); context_word[0] = -1; context_word[1] = -1; /* * Frames window size for predicting phones based on topsen. * If 1, no prediction; use all phones. */ if ((topsen_window = cmd_ln_int32("-topsenfrm")) < 1) E_FATAL("topsen window = %d\n", topsen_window); E_INFO("topsen-window = %d, ", topsen_window); topsen_thresh = cmd_ln_int32("-topsenthresh"); if (topsen_window > 1) E_INFOCONT("threshold = %d\n", topsen_thresh); else E_INFOCONT("no phone-prediction\n"); topsen_init(); sc_scores = (int32 **) ckd_calloc_2d(topsen_window, bin_mdef_n_sen(mdef), sizeof(int32)); senone_scores = sc_scores[0]; topsen_score = ckd_calloc(MAX_FRAMES, sizeof(int32)); /* * Allocate bestscore/phone array: * bestpscr = single array of best-senone-based CIphones scores, updated * every frame. */ bestpscr = ckd_calloc(NumCiPhones, sizeof(int32)); search_set_beam_width(cmd_ln_float64("-beam")); search_set_new_word_beam_width(cmd_ln_float64("-wbeam")); search_set_new_phone_beam_width(cmd_ln_float64("-pbeam")); search_set_last_phone_beam_width(cmd_ln_float64("-lpbeam")); search_set_lastphone_alone_beam_width(cmd_ln_float64("-lponlybeam")); search_set_silence_word_penalty(cmd_ln_float32("-silpen"), cmd_ln_float32("-pip")); search_set_filler_word_penalty(cmd_
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -