⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 search.c

📁 WinCE平台上的语音识别程序
💻 C
📖 第 1 页 / 共 5 页
字号:
    n_active_word[nf & 0x1] = nawl - active_word_list[nf & 0x1];    /*     * Prune permanently allocated single-phone channels.     * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.     */    for (i = 0; i < n_1ph_words; i++) {        w = single_phone_wid[i];        rhmm = (ROOT_CHAN_T *) word_chan[w];        if (rhmm->active < cf)            continue;        if (rhmm->bestscore > lastphn_thresh) {            rhmm->active = nf;            /* Could if ((! skip_alt_frm) || (cf & 0x1)) the following */            if (rhmm->score[HMM_LAST_STATE] > newword_thresh) {                save_bwd_ptr(w, rhmm->score[HMM_LAST_STATE],                             rhmm->path[HMM_LAST_STATE], 0);            }        }    }}/* * Allocate last phone channels for all possible right contexts for word w.  (Some * may already exist.) * (NOTE: Assume that w uses context!!) */voidalloc_all_rc(int32 w){    dict_entry_t *de;    CHAN_T *hmm, *thmm;    int32 *sseq_rc;             /* list of sseqid for all possible right context for w */    int32 i;    de = word_dict->dict_list[w];    assert(de->mpx);    sseq_rc = RightContextFwd[de->phone_ids[de->len - 1]];    hmm = word_chan[w];    if ((hmm == NULL) || (hmm->sseqid != *sseq_rc)) {        hmm = (CHAN_T *) listelem_alloc(sizeof(CHAN_T));        hmm->next = word_chan[w];        word_chan[w] = hmm;        hmm->info.rc_id = 0;        hmm->ciphone = de->ci_phone_ids[de->len - 1];        hmm->bestscore = WORST_SCORE;        hmm->score[0] = WORST_SCORE;        hmm->score[1] = WORST_SCORE;        hmm->score[2] = WORST_SCORE;#if HMM_5_STATE        hmm->score[3] = WORST_SCORE;        hmm->score[4] = WORST_SCORE;#endif        hmm->active = -1;        hmm->sseqid = *sseq_rc;    }    for (i = 1, sseq_rc++; *sseq_rc >= 0; sseq_rc++, i++) {        if ((hmm->next == NULL) || (hmm->next->sseqid != *sseq_rc)) {            thmm = (CHAN_T *) listelem_alloc(sizeof(CHAN_T));            thmm->next = hmm->next;            hmm->next = thmm;            hmm = thmm;            hmm->info.rc_id = i;            hmm->ciphone = de->ci_phone_ids[de->len - 1];            hmm->bestscore = WORST_SCORE;            hmm->score[0] = WORST_SCORE;            hmm->score[1] = WORST_SCORE;            hmm->score[2] = WORST_SCORE;#if HMM_5_STATE            hmm->score[3] = WORST_SCORE;            hmm->score[4] = WORST_SCORE;#endif            hmm->active = -1;            hmm->sseqid = *sseq_rc;        }        else            hmm = hmm->next;    }}voidfree_all_rc(int32 w){    CHAN_T *hmm, *thmm;    for (hmm = word_chan[w]; hmm; hmm = thmm) {        thmm = hmm->next;        listelem_free(hmm, sizeof(CHAN_T));    }    word_chan[w] = NULL;}/* * Structure for reorganizing the BP table entries in the current frame according * to distinct right context ci-phones.  Each entry contains the best BP entry for * a given right context.  Each successor word will pick up the correct entry based * on its first ci-phone. */struct bestbp_rc_s {    int32 score;    int32 path;                 /* BP table index corresponding to this entry */    int32 lc;                   /* right most ci-phone of above BP entry word */} *bestbp_rc;voidword_transition(void){    int32 i, k, bp, w, cf, nf;    /* int32 prev_bp, prev_wid, prev_endframe, prev2_bp, prev2_wid; */    int32 /* rcsize, */ rc;    int32 *rcss;                /* right context score stack */    int32 *rcpermtab;    int32 thresh, /* newword_thresh, */ newscore;    BPTBL_T *bpe;    dict_entry_t *pde, *de;     /* previous dict entry, dict entry */    ROOT_CHAN_T *rhmm;    /* CHAN_T *hmm; */    struct bestbp_rc_s *bestbp_rc_ptr;    int32 last_ciph;    int32 /* fwid0, fwid1, */ fwid2;    int32 pip;    int32 ssid;    cf = CurrentFrame;    /*     * Transition to start of new word instances (HMM tree roots); but only if words     * other than </s> finished here.     * But, first, find the best starting score for each possible right context phone.     */    for (i = NumCiPhones - 1; i >= 0; --i)        bestbp_rc[i].score = WORST_SCORE;    k = 0;    for (bp = BPTableIdx[cf]; bp < BPIdx; bp++) {        bpe = &(BPTable[bp]);        WordLatIdx[bpe->wid] = NO_BP;        if (bpe->wid == FinishWordId)            continue;        k++;        de = word_dict->dict_list[bpe->wid];        rcpermtab =            (bpe->r_diph >=             0) ? RightContextFwdPerm[bpe->r_diph] : zeroPermTab;        last_ciph = de->ci_phone_ids[de->len - 1];        rcss = &(BScoreStack[bpe->s_idx]);        for (rc = NumCiPhones - 1; rc >= 0; --rc) {            if (rcss[rcpermtab[rc]] > bestbp_rc[rc].score) {                bestbp_rc[rc].score = rcss[rcpermtab[rc]];                bestbp_rc[rc].path = bp;                bestbp_rc[rc].lc = last_ciph;            }        }    }    if (k == 0)        return;    nf = cf + 1;    thresh = BestScore + DynamicLogBeamWidth;    pip = logPhoneInsertionPenalty;    /*     * Hypothesize successors to words finished in this frame.     * Main dictionary, multi-phone words transition to HMM-trees roots.     */    for (i = n_root_chan, rhmm = root_chan; i > 0; --i, rhmm++) {        bestbp_rc_ptr = &(bestbp_rc[rhmm->ciphone]);        if (npa[rhmm->ciphone]) {            newscore = bestbp_rc_ptr->score + newword_penalty + pip;            if (newscore > thresh) {                if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) {                    ssid =                        LeftContextFwd[rhmm->diphone][bestbp_rc_ptr->lc];                    rhmm->score[0] = newscore;                    rhmm->path[0] = bestbp_rc_ptr->path;                    rhmm->active = nf;                    rhmm->sseqid[0] = ssid;                }            }        }    }    /*     * Single phone words; no right context for these.  Cannot use bestbp_rc as     * LM scores have to be included.  First find best transition to these words.     */    for (i = 0; i < n_1ph_LMwords; i++) {        w = single_phone_wid[i];        last_ltrans[w].dscr = (int32) 0x80000000;    }    for (bp = BPTableIdx[cf]; bp < BPIdx; bp++) {        bpe = &(BPTable[bp]);        if (!bpe->valid)            continue;        rcpermtab =            (bpe->r_diph >=             0) ? RightContextFwdPerm[bpe->r_diph] : zeroPermTab;        rcss = BScoreStack + bpe->s_idx;        for (i = 0; i < n_1ph_LMwords; i++) {            w = single_phone_wid[i];            de = word_dict->dict_list[w];            fwid2 = de->fwid;            newscore = rcss[rcpermtab[de->ci_phone_ids[0]]];            newscore +=                lm_tg_score(bpe->prev_real_fwid, bpe->real_fwid, fwid2);            if (last_ltrans[w].dscr < newscore) {                last_ltrans[w].dscr = newscore;                last_ltrans[w].bp = bp;            }        }    }    /* Now transition to in-LM single phone words */    for (i = 0; i < n_1ph_LMwords; i++) {        w = single_phone_wid[i];        rhmm = (ROOT_CHAN_T *) word_chan[w];        if ((w != FinishWordId) && (!npa[rhmm->ciphone]))            continue;        if ((newscore = last_ltrans[w].dscr + pip) > thresh) {            bpe = BPTable + last_ltrans[w].bp;            pde = word_dict->dict_list[bpe->wid];            if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) {                rhmm->score[0] = newscore;                rhmm->path[0] = last_ltrans[w].bp;                if (rhmm->mpx)                    rhmm->sseqid[0] =                        LeftContextFwd[rhmm->diphone][pde->                                                      ci_phone_ids[pde->                                                                   len -                                                                   1]];                rhmm->active = nf;            }        }    }    /* Remaining words: <sil>, noise words.  No mpx for these! */    bestbp_rc_ptr = &(bestbp_rc[SilencePhoneId]);    newscore = bestbp_rc_ptr->score + SilenceWordPenalty + pip;    if (newscore > thresh) {        w = SilenceWordId;        rhmm = (ROOT_CHAN_T *) word_chan[w];        if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) {            rhmm->score[0] = newscore;            rhmm->path[0] = bestbp_rc_ptr->path;            rhmm->active = nf;        }    }    newscore = bestbp_rc_ptr->score + FillerWordPenalty + pip;    if (newscore > thresh) {        for (w = SilenceWordId + 1; w < NumWords; w++) {            rhmm = (ROOT_CHAN_T *) word_chan[w];            if ((rhmm->active < cf) || (rhmm->score[0] < newscore)) {                rhmm->score[0] = newscore;                rhmm->path[0] = bestbp_rc_ptr->path;                rhmm->active = nf;            }        }    }}voidsearch_initialize(void){    int32 bptable_size = cmd_ln_int32("-latsize");#if SEARCH_TRACE_CHAN_DETAILED    static void load_trace_wordlist();#endif    NumWords = word_dict->dict_entry_count;    StartWordId = kb_get_word_id(cmd_ln_str("-lmstartsym"));    FinishWordId = kb_get_word_id(cmd_ln_str("-lmendsym"));    SilenceWordId = kb_get_word_id("SIL");    SilencePhoneId = phone_to_id("SIL", TRUE);    NumCiPhones = phoneCiCount();    LeftContextFwd = dict_left_context_fwd();    RightContextFwd = dict_right_context_fwd();    RightContextFwdPerm = dict_right_context_fwd_perm();    RightContextFwdSize = dict_right_context_fwd_size();    LeftContextBwd = dict_left_context_bwd();    LeftContextBwdPerm = dict_left_context_bwd_perm();    LeftContextBwdSize = dict_left_context_bwd_size();    RightContextBwd = dict_right_context_bwd();    NumMainDictWords = dict_get_num_main_words(word_dict);    word_chan = ckd_calloc(NumWords, sizeof(CHAN_T *));    WordLatIdx = ckd_calloc(NumWords, sizeof(int32));    zeroPermTab = ckd_calloc(phoneCiCount(), sizeof(int32));    word_active = ckd_calloc(NumWords, sizeof(int32));    if (NumWords / 1000 < 25)        BPTableSize = 25 * MAX_FRAMES;    else        BPTableSize = NumWords / 1000 * MAX_FRAMES;    BScoreStackSize = BPTableSize * 20;    if ((bptable_size > 0) && (bptable_size < 0x7fffffff)) {        BPTableSize = bptable_size;        BScoreStackSize = BPTableSize * 20;     /* 20 = ave. rc fanout */    }    BPTable = ckd_calloc(BPTableSize, sizeof(BPTBL_T));    BScoreStack = ckd_calloc(BScoreStackSize, sizeof(int32));    BPTableIdx = ckd_calloc(MAX_FRAMES + 2, sizeof(int32));    BPTableIdx++;               /* Make BPTableIdx[-1] valid */    lattice_density = ckd_calloc(MAX_FRAMES, sizeof(int32));    init_search_tree(word_dict);    active_word_list[0] =        ckd_calloc(2 * (NumWords + 1), sizeof(WORD_ID));    active_word_list[1] = active_word_list[0] + NumWords + 1;    bestbp_rc = ckd_calloc(NumCiPhones,                                                 sizeof(struct                                                        bestbp_rc_s));#if SEARCH_TRACE_CHAN_DETAILED    load_trace_wordlist("_TRACEWORDS_");#endif    lastphn_cand =        ckd_calloc(NumWords, sizeof(lastphn_cand_t));    senone_active = ckd_calloc(bin_mdef_n_sen(mdef), sizeof(int32));    senone_active_vec =        ckd_calloc((bin_mdef_n_sen(mdef) + BITVEC_WIDTH - 1)                               / BITVEC_WIDTH, sizeof(bitvec_t));    /* If we are doing two-pass search, cache the senone scores from     * the first pass (trading off memory for speed). */    if (cmd_ln_boolean("-fwdtree")        && cmd_ln_boolean("-fwdflat")        && cmd_ln_boolean("-cachesen")) {        past_senone_scores = ckd_calloc(MAX_FRAMES, sizeof(int32 *));        past_senone_active_vec = ckd_calloc(MAX_FRAMES, sizeof(bitvec_t *));    }    last_ltrans =        ckd_calloc(NumWords, sizeof(last_ltrans_t));    search_fwdflat_init();    searchlat_init();    context_word[0] = -1;    context_word[1] = -1;    /*     * Frames window size for predicting phones based on topsen.     * If 1, no prediction; use all phones.     */    if ((topsen_window = cmd_ln_int32("-topsenfrm")) < 1)        E_FATAL("topsen window = %d\n", topsen_window);    E_INFO("topsen-window = %d, ", topsen_window);    topsen_thresh = cmd_ln_int32("-topsenthresh");    if (topsen_window > 1)        E_INFOCONT("threshold = %d\n", topsen_thresh);    else        E_INFOCONT("no phone-prediction\n");    topsen_init();    sc_scores =        (int32 **) ckd_calloc_2d(topsen_window, bin_mdef_n_sen(mdef), sizeof(int32));    senone_scores = sc_scores[0];    topsen_score = ckd_calloc(MAX_FRAMES, sizeof(int32));    /*     * Allocate bestscore/phone array:     * bestpscr = single array of best-senone-based CIphones scores, updated     * every frame.     */    bestpscr = ckd_calloc(NumCiPhones, sizeof(int32));    search_set_beam_width(cmd_ln_float64("-beam"));    search_set_new_word_beam_width(cmd_ln_float64("-wbeam"));    search_set_new_phone_beam_width(cmd_ln_float64("-pbeam"));    search_set_last_phone_beam_width(cmd_ln_float64("-lpbeam"));    search_set_lastphone_alone_beam_width(cmd_ln_float64("-lponlybeam"));    search_set_silence_word_penalty(cmd_ln_float32("-silpen"),                                    cmd_ln_float32("-pip"));    search_set_filler_word_penalty(cmd_

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -