📄 factoring_sub.c
字号:
for (i=1;i<wchmm->scnum;i++) { if (freemark[i] == TRUE) { free_successor(wchmm, i); /* reset node -> sclist link */ wchmm->state[wchmm->sclist2node[i]].scid = 0; } } /* garbage collection of deleted sclist */ compaction_successor(wchmm); free(freemark); jlog("STAT: done\n");}#ifdef UNIGRAM_FACTORING/** * <JA> * 腾菇陇步辑今惧の链ノ〖ドに successor list を菇蜜するメイン簇眶(unigram factoring 脱 * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Main function to build whole successor list to lexicon tree for unigram factoring * * @param wchmm [i/o] tree lexicon * </EN> * * @callgraph * @callergraph * */voidmake_successor_list_unigram_factoring(WCHMM_INFO *wchmm){#ifndef FAST_FACTOR1_SUCCESSOR_LIST /* old way */ make_successor_list(wchmm); calc_all_unigram_factoring_values(wchmm);#else /* ~FAST_FACTOR1_SUCCESSOR_LIST */ /* new way */ int node, node2; WORD_ID w, w2; int i, j, n, f; int s; LOGPROB tmpprob; jlog("STAT: make successor lists for unigram factoring\n"); /* 1. initialize */ /* initialize node->sclist index on wchmm tree */ for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0; /* in unigram factoring, number of successor = vocabulary size */ wchmm->scnum = wchmm->winfo->num + 1; if (debug2_flag) { jlog("DEBUG: successor list size = %d\n", wchmm->scnum); } /* allocate successor list */ wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum); for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL; /* sclist2node is not used */ /* 2. make successor list, and count needed fscore num */ f = 1; s = 1; for (w=0;w<wchmm->winfo->num;w++) { for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) { if (i < wchmm->winfo->wlen[w]) { node = wchmm->offset[w][i]; } else { node = wchmm->wordend[w]; } if (wchmm->state[node].scid == 0) { /* not assigned */ /* new node found, assign new and exit here */ wchmm->state[node].scid = s++; if (s > wchmm->scnum) { jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n"); return; } add_successor(wchmm, node, w); break; } else if (wchmm->state[node].scid > 0) { /* that node has sclist */ /* move it to the current first isolated node in that word */ w2 = wchmm->sclist[wchmm->state[node].scid]->word; for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) { if (j < wchmm->winfo->wlen[w2]) { node2 = wchmm->offset[w2][j]; } else { node2 = wchmm->wordend[w2]; } if (wchmm->state[node2].scid == 0) { /* not assigned */ /* move sclist to there */ wchmm->state[node2].scid = wchmm->state[node].scid; break; } } if (j >= wchmm->winfo->wlen[w2] + 1) { /* not found? */ jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2); return; } /* make current node as fscore node */ n = f++; wchmm->state[node].scid = -n; /* not compute unigram factoring value yet */ } } } /* 2. allocate fscore buffer */ wchmm->fsnum = f; wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum); for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO; /* 3. parse again to assign fscore values */ for (w=0;w<wchmm->winfo->num;w++) { for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) { if (i < wchmm->winfo->wlen[w]) { node = wchmm->offset[w][i]; } else { node = wchmm->wordend[w]; } if (wchmm->state[node].scid < 0) { /* update max */ if (wchmm->ngram) { tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w])#ifdef CLASS_NGRAM + wchmm->winfo->cprob[w]#endif ; } else { tmpprob = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob); } n = - wchmm->state[node].scid; if (wchmm->fscore[n] < tmpprob) { wchmm->fscore[n] = tmpprob; } } } }#endif /* ~FAST_FACTOR1_SUCCESSOR_LIST */ jlog("STAT: done\n");}#endif /* UNIGRAM_FACTORING *//** * <JA> * 菇蜜された factoring 攫鼠を multipath 脱に拇腊する. factoring 攫鼠を, * モデル链挛をスキップする莲败がある眷圭はその黎の不燎へコピ〖する. * また·(叫蜗を积たない)矢片矢恕ノ〖ドに帽胳黎片ノ〖ドからコピ〖する. * * @param wchmm [in] 腾菇陇步辑今 * </JA> * <EN> * Adjust factoring data in tree lexicon for multipath transition handling. * * @param wchmm [in] tree lexicon * </EN> * * @callgraph * @callergraph * */voidadjust_sc_index(WCHMM_INFO *wchmm){ WORD_ID w; int i,j,k; HMM_Logical *ltmp; int ltmp_state_num; int ato; LOGPROB prob; int node, scid; A_CELL2 *ac; /* duplicate scid for HMMs with more than one arc from initial state */ for(w=0;w<wchmm->winfo->num;w++) { for(k=0;k<wchmm->winfo->wlen[w];k++) { node = wchmm->offset[w][k]; scid = wchmm->state[node].scid; if (scid == 0) continue; ltmp = wchmm->winfo->wseq[w][k]; ltmp_state_num = hmm_logical_state_num(ltmp); if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) { j = k + 1; if (j == wchmm->winfo->wlen[w]) { if (wchmm->state[wchmm->wordend[w]].scid == 0) { jlog("STAT: word %d: factoring node copied for skip phone\n", w); wchmm->state[wchmm->wordend[w]].scid = scid; } } else { if (wchmm->state[wchmm->offset[w][j]].scid == 0) { jlog("STAT: word %d: factoring node copied for skip phone\n", w); wchmm->state[wchmm->offset[w][j]].scid = scid; } } } for(ato=1;ato<ltmp_state_num;ato++) { prob = (hmm_logical_trans(ltmp))->a[0][ato]; if (prob != LOG_ZERO) { wchmm->state[node+ato-1].scid = scid; } } } } /* move scid and fscore on the head state to the head grammar state */ for(i=0;i<wchmm->startnum;i++) { node = wchmm->startnode[i]; if (wchmm->state[node].out.state != NULL) { j_internal_error("adjust_sc_index: outprob exist in word-head node??\n"); } if (wchmm->next_a[node] != LOG_ZERO) { if (wchmm->state[node+1].scid != 0) { if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) { j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); } wchmm->state[node].scid = wchmm->state[node+1].scid; wchmm->state[node+1].scid = 0; } } for(ac=wchmm->ac[node];ac;ac=ac->next) { for(k=0;k<ac->n;k++) { if (wchmm->state[ac->arc[k]].scid != 0) { if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) { j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); } wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid; wchmm->state[ac->arc[k]].scid = 0; } } } }}/* -------------------------------------------------------------------- *//* factoring computation *//** * <JA> * 腾菇陇步辑今脱の factoring キャッシュをメモリ充り烧けして介袋步する. * この簇眶はプログラム倡幌箕に办刨だけ钙ばれる. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Initialize factoring cache for a tree lexicon, allocating memory for * cache. This should be called only once on start up. * * @param wchmm [i/o] tree lexicon * </EN> * * @callgraph * @callergraph * */voidmax_successor_cache_init(WCHMM_INFO *wchmm){ int i; LM_PROB_CACHE *l; WORD_ID wnum; /* finally shrink the memory area of successor list here */ shrink_successor(wchmm); /* for word-internal */ l = &(wchmm->lmcache); l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum); l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum); for (i=0;i<wchmm->scnum;i++) { l->lastwcache[i] = WORD_INVALID; } /* for cross-word */ if (wchmm->ngram) { wnum = wchmm->ngram->max_word_num; } else { wnum = wchmm->winfo->num; }#ifdef HASH_CACHE_IW l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100; if (l->iw_cache_num < 10) l->iw_cache_num = 10;#else l->iw_cache_num = wnum;#endif /* HASH_CACHE_IW */ l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num); for (i=0;i<l->iw_cache_num;i++) { l->iw_sc_cache[i] = NULL; }#ifdef HASH_CACHE_IW l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num); for (i=0;i<l->iw_cache_num;i++) { l->iw_lw_cache[i] = WORD_INVALID; }#endif}/** * <JA> * 帽胳粗の factoring cache のメモリ挝拌を豺庶する. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Free cross-word factoring cache. * * @param wchmm [i/o] tree lexicon * </EN> */static voidmax_successor_prob_iw_free(WCHMM_INFO *wchmm){ int i; LM_PROB_CACHE *l; l = &(wchmm->lmcache); for (i=0;i<l->iw_cache_num;i++) { if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]); l->iw_sc_cache[i] = NULL; }}/** * <JA> * factoring 脱 cache のメモリ挝拌を链て豺庶する. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Free all memory for factoring cache. * * @param wchmm [i/o] tree lexicon * </EN> * * @callgraph * @callergraph * */voidmax_successor_cache_free(WCHMM_INFO *wchmm){ free(wchmm->lmcache.probcache); free(wchmm->lmcache.lastwcache); max_successor_prob_iw_free(wchmm); free(wchmm->lmcache.iw_sc_cache);#ifdef HASH_CACHE_IW free(wchmm->lmcache.iw_lw_cache);#endif}#ifdef UNIGRAM_FACTORING/** * <JA> * @brief 帽胳黎片ノ〖ドのうちFactoring においてキャッシュが涩妥なノ〖ドの * リストを侯喇する. * * 1-gram factoring は·晦ノ〖ドにおいて木涟帽胳に巴赂しない盖年猛 * (unigramの呵络猛)を涂える. このため·帽胳粗の factoring 纷换において· * 腾菇陇步辑今惧で剩眶の帽胳で鼎铜されている帽胳黎片ノ〖ドについては· * その猛は木涟帽胳によらず盖年猛であり·千急箕に帽胳粗キャッシュを瘦积 * する涩妥はない. * * この簇眶では·帽胳黎片ノ〖ドのリストからそのような factoring キャッシュが * 稍妥なノ〖ドを近嘲して·1-gram factoring 箕に帽胳粗キャッシュが涩妥な * 帽胳黎片ノ〖ド∈♂戮の帽胳と鼎铜されていない迫惟した帽胳黎片ノ〖ド∷の * リストを侯喇し·wchmm->start2isolate および wchmm->isolatenum に呈羌する. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * @brief Make a list of word head nodes on which cross-word factoring cache * is needed. * * On 1-gram factoring, the branch nodes on tree lexicon has a fixed * factoring value (maximum 1-gram score of all sub-tree words). Thus, when * computing cross-word factoring at word head nodes on inter-word * transition, such 1-gram factoring nodes on word head, shared by several * words, need not be cached in inter-word factoring cache. * * This function make a list of word-head nodes which requires inter-word * factoring caching (i.e. isolated word head nodes, does not shared by other * words) from the existing list of word head nodes, and set it to * wchmm->start2isolate and wchmm->isolatenum. * * @param wchmm [i/o] tree lexicon * </EN> * * @callgraph * @callergraph * */voidmake_iwcache_index(WCHMM_INFO *wchmm){ int i, node, num; wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum); num = 0; for(i=0;i<wchmm->startnum;i++) { node = wchmm->startnode[i]; if (wchmm->state[node].scid >= 0) { /* not a factoring node (isolated node, has no 1-gram factoring value) */ wchmm->start2isolate[i] = num; num++; } else { /* factoring node (shared) */ wchmm->start2isolate[i] = -1; } } wchmm->isolatenum = num;}/** * <JA> * @brief 腾菇陇步辑今惧の 1-gram factoring 猛を纷换して呈羌する. * * 1-gram factoring では帽胳粗で鼎铜されている晦ノ〖ドでは 1-gram の呵络猛 * を涂える. 帽胳旺悟によらないため·その猛は千急倡幌涟に * 纷换しておくことができる. この簇眶は腾菇陇步辑今 * 链挛について·鼎铜されている∈successor list に2つ笆惧の帽胳を积つノ〖ド∷ * ノ〖ドの 1-gram factoring 猛を纷换して呈羌する. 1-gram factoring猛を * 纷换稿は·そのノ〖ドの successor list はもはや稍妥であるため·ここで * 猴近する. * * 悸狠には·factoring 猛は wchmm->fscore に界肌瘦赂され·ノ〖ドの * scid にその瘦赂猛へのインデックス(1-)の砷の猛が呈羌される. 稍妥になった * successor list は·悸狠には compaction_successor 柒で·滦炳するノ〖ドの * scid が砷になっている successor list を猴近することで乖なわれる. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * @brief Calculate all the 1-gram factoring values on tree lexicon. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -