📄 wchmm.c
字号:
* * @param winfo [in] 帽胳辑今 * @param i [in] 帽胳1 * @param j [in] 帽胳2 * * @return 鼎铜材墙な黎片からの不燎眶を手す. * </JA> * <EN> * Compare two words from word head per phoneme to see how many phones * can be shared among the two. * * @param winfo [in] word dictionary * @param i [in] a word * @param j [in] another word * * @return the number of phonemes to be shared from the head of the words. * </EN> */static intwchmm_check_match(WORD_INFO *winfo, int i, int j){ int k,tmplen; for (tmplen=0,k=0;k<winfo->wlen[i];k++) { if (k > winfo->wlen[j]-1) break; if (! (strmatch(winfo->wseq[i][k]->name, winfo->wseq[j][k]->name))) break; tmplen++; } return(tmplen);}/** * <EN> * Initialize transition information on a node. * </EN> * <JA> * ノ〖ドの莲败攫鼠を介袋步する. * </JA> * * @param wchmm [i/o] tree lexicon * @param node [in] node id * */static voidacc_init(WCHMM_INFO *wchmm, int node){ wchmm->self_a[node] = LOG_ZERO; wchmm->next_a[node] = LOG_ZERO; wchmm->ac[node] = NULL;}/** * <EN> * Add an arc to a node. * This function is for transition other than self and next node. * </EN> * <JA> * ノ〖ドに莲败を纳裁する. * この簇眶は极甘莲败ˇ钨への莲败笆嘲の眷圭に蝗脱される. * </JA> * * @param wchmm [i/o] tree lexicon * @param node [in] node id * @param a [in] transition probability in log10 * @param arc [in] transition destination node id * */static voidadd_ac(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc){ A_CELL2 *ac2; for(ac2=wchmm->ac[node];ac2;ac2=ac2->next) { if (ac2->n < A_CELL2_ALLOC_STEP) break; } if (ac2 == NULL) { ac2 = (A_CELL2 *)mybmalloc2(sizeof(A_CELL2), &(wchmm->malloc_root)); ac2->n = 0; ac2->next = wchmm->ac[node]; wchmm->ac[node] = ac2; } ac2->arc[ac2->n] = arc; ac2->a[ac2->n] = a; ac2->n++;}/** * <JA> * 腾菇陇步辑今のあるノ〖ドに·侍のノ〖ドへの莲败を纳裁する * * @param wchmm [i/o] 腾菇陇步辑今 * @param node [in] ノ〖ド戎规 * @param a [in] 莲败澄唯∈滦眶∷ * @param arc [in] 莲败黎のノ〖ド戎规 * </JA> * <EN> * Add a transition arc between two nodes on the tree lexicon * * @param wchmm [i/o] tree lexicon * @param node [in] node number of source node * @param a [in] transition probability in log scale * @param arc [in] node number of destination node * </EN> */static voidadd_wacc(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc){ if (arc == node) { wchmm->self_a[node] = a; } else if (arc == node + 1) { wchmm->next_a[node] = a; } else { add_ac(wchmm, node, a, arc); }}/** * <JA> * ある帽胳のある疤弥の不燎から帽胳琐眉の嘲へ叫る莲败のリストを评る. (multipath) * * @param wchmm [in] 腾菇陇步辑今 * @param w [in] 帽胳ID * @param pos [in] 不燎疤弥 * @param node [out] 不燎柒の·帽胳琐眉嘲への莲败を积つ觉轮のリスト * @param a [out] @a node の称妥燎の莲败澄唯 * @param num [out] @a node の妥燎眶. 券斧眶だけ笼裁される. * @param maxnum [in] @a node の呈羌材墙な呵络眶 * @param insert_sp [in] 帽胳姜眉での sp 洞み哈みを雇胃するならTRUE * </JA> * <EN> * Make outgoing transition list for given phone position of a word. (multipath) * * @param wchmm [in] tree lexicon * @param w [in] word ID * @param pos [in] location of target phone to be inspected in the word @a w * @param node [out] list of wchmm states that possibly has outgoing transition * @param a [out] transition probabilities of the outgoing transitions in @a node * @param num [out] number of elements in @a out (found num will be added) * @param maxnum [in] maximum number of elements that can be stored in @a node * @param insert_sp [in] TRUE if consider short-pause insertion on word end * </EN> */static voidget_outtrans_list(WCHMM_INFO *wchmm, WORD_ID w, int pos, int *node, LOGPROB *a, int *num, int maxnum, boolean insert_sp){ HMM_Logical *ltmp; int states; int k; LOGPROB prob; int oldnum; if (pos < 0) { /* set the word-beginning node, and return */ node[*num] = wchmm->wordbegin[w]; a[*num] = 0.0; (*num)++; } else { ltmp = wchmm->winfo->wseq[w][pos]; states = hmm_logical_state_num(ltmp); /* check initial->final state */ if ((hmm_logical_trans(ltmp))->a[0][states-1] != LOG_ZERO) { /* recursive call for previous phone */ oldnum = *num; get_outtrans_list(wchmm, w, pos-1, node, a, num, maxnum, FALSE); /* previous phone should not be an sp-inserted phone */ /* add probability of the skip transition to all the previous ones */ for(k=oldnum;k<*num;k++) { a[k] += (hmm_logical_trans(ltmp))->a[0][states-1]; } } /* add to list the arcs from output state to final state */ for (k = 1; k < states - 1; k++) { prob = (hmm_logical_trans(ltmp))->a[k][states-1]; if (prob != LOG_ZERO) { if (*num >= maxnum) { j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum); } node[*num] = wchmm->offset[w][pos] + k - 1; a[*num] = prob; (*num)++; } } /* for -iwsp, add outgoing arc from the tail sp model only if need_sp == TRUE. need_sp should be TRUE only when the connecting [pos] phone is also an end phone of the to-be-added word (i.e. homophone word) */ /* */ if (insert_sp) { /* consider sp */ for (k = 1; k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) { prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][hmm_logical_state_num(wchmm->hmminfo->sp)-1]; if (prob != LOG_ZERO) { if (*num >= maxnum) { j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum); } node[*num] = wchmm->offset[w][pos] + (states - 2) + k - 1; a[*num] = prob; (*num)++; } } } } /*printf(" %d(%s)-%d:\"%s\", num=%d\n", w, wchmm->winfo->woutput[w], pos, (pos < 0) ? "BGN" : wchmm->winfo->wseq[w][pos]->name, *num);*/ return;} /** * <JA> * ある不燎の琐萨の觉轮から·ある不燎の黎片觉轮への莲败を纳裁する. * * @param wchmm [i/o] 腾菇陇步辑今 * @param from_node [in] ある不燎の琐萨の觉轮 * @param to_node [in] ある不燎の黎片觉轮 * @param tinfo [in] @a from_node の掳する不燎HMMの莲败澄唯乖误 * </JA> * <EN> * Add a transition from end node of a phone to start node of another phone. * * @param wchmm [i/o] tree lexicon * @param from_node [in] end node of a phone * @param to_node [in] start node of a phone * @param tinfo [in] transition prob. matrix of the @a from_node phone. * </EN> */static voidwchmm_link_hmm(WCHMM_INFO *wchmm, int from_node, int to_node, HTK_HMM_Trans *tinfo){ A_CELL2 *actmp; LOGPROB a; int i, j; boolean tflag; /* get transition probability to outer state in tinfo */ for(i = tinfo->statenum - 2; i >= 0; i--) { if ((a = tinfo->a[i][tinfo->statenum-1]) != LOG_ZERO) { /* found */ /* check if the arc already exist */ tflag = FALSE; if (to_node == from_node && wchmm->self_a[from_node] == a) { tflag = TRUE; } else if (to_node == from_node + 1 && wchmm->next_a[from_node] == a) { tflag = TRUE; } else { for (actmp = wchmm->ac[from_node]; actmp; actmp = actmp->next) { for(j=0;j<actmp->n;j++) { if (actmp->arc[j] == to_node && actmp->a[j] == a) { tflag = TRUE; break; } } if (tflag == TRUE) break; } } if (tflag) break; /* add the arc to wchmm */ add_wacc(wchmm, from_node, a, to_node); return; /* exit function here */ } } j_internal_error("wchmm_link_hmm: No arc to endstate?\n");}/** * <JA> * 腾菇陇步辑今面の2帽胳面のある不燎粗を儡鲁する. * * @param wchmm [i/o] 腾菇陇步辑今 * @param from_word [in] 莲败傅の帽胳のID * @param from_seq [in] 莲败傅の帽胳面の儡鲁する不燎の疤弥 * @param to_word [in] 莲败黎の帽胳のID * @param to_seq [in] 莲败黎の帽胳面の儡鲁する不燎の疤弥 * </JA> * <EN> * Connect two phonemes in tree lexicon. * * @param wchmm [i/o] tree lexicon * @param from_word [in] source word ID * @param from_seq [in] index of source phoneme in @a from_word from which the other will be connected * @param to_word [in] destination word ID * @param to_seq [in] index of destination phoneme in @a to_word to which the other will connect * </EN> */static voidwchmm_link_subword(WCHMM_INFO *wchmm, int from_word, int from_seq, int to_word, int to_seq){ HMM_Logical *last; int lastp; last = wchmm->winfo->wseq[from_word][from_seq]; lastp = wchmm->offset[from_word][from_seq] + hmm_logical_state_num(last)-2 -1; wchmm_link_hmm(wchmm, lastp, wchmm->offset[to_word][to_seq], hmm_logical_trans(last));}/**************************************************************//******** homophone processing: duplicating leaf nodes ********//**************************************************************//** * @note * <JA> * 票不胳借妄: * 腾菇陇步辑今においてすべての帽胳は迫惟した呵姜觉轮を积つ涩妥があるため· * 票不胳は庙罢考く胺う涩妥がある. このため·呵介の腾菇陇步辑今を菇蜜した稿, * 侍の帽胳と窗链に鼎铜された帽胳(票不胳), あるいは侍の帽胳の办婶として * 虽め哈まれてしまっている帽胳を券斧するとともに, その呵姜ノ〖ドを * コピ〖して糠たな帽胳姜眉ノ〖ドを侯る涩妥がある. * </JA> * <EN> * Homophones: * As all words need to have an uniq state as a final state in a lexicon tree, * homophones should be handled carefully. After primal tree has been made, * we look through the tree to find the fully shared or embedded words * (homophone or part of other word), and duplicate the last leaf node * to have uniq end state. * </EN> *//** * <JA> * 帽胳姜眉觉轮の迫惟步¨涂えられた帽胳の姜眉ノ〖ドをコピ〖して· * 糠たにある帽胳の呵姜觉轮として年盗する. * * @param wchmm [i/o] 腾菇陇步辑今 * @param node [in] 票不胳の姜眉ノ〖ド戎规 * @param word [in] 糠たに判峡する帽胳 * </JA> * <EN> * Isolation of word-end nodes for homophones: duplicate the word-end state, * link as the same as original, and make it the new word-end node of the * given new word. * * @param wchmm [i/o] tree lexicon * @param node [in] the word end node of the already existing homophone * @param word [in] word ID to be added to the tree * </EN> */static voidwchmm_duplicate_state(WCHMM_INFO *wchmm, int node, int word) /* source node, new word */{ int j, n; int n_src, n_prev; A_CELL2 *ac; HMM_Logical *lastphone; /* 1 state will newly created: expand tree if needed */ if (wchmm->n + 1 >= wchmm->maxwcn) { wchmm_extend(wchmm); } /* n: the target new node to which 'node' is copied */ n = wchmm->n; n_src = node; /* copy output probability info */#ifdef PASS1_IWCD { RC_INFO *rcnew; LRC_INFO *lrcnew; wchmm->outstyle[n] = wchmm->outstyle[n_src]; if (wchmm->outstyle[n] == AS_RSET) { /* duplicate RC_INFO because it has its own cache */ rcnew = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root)); memcpy(rcnew, wchmm->state[n_src].out.rset, sizeof(RC_INFO)); wchmm->state[n].out.rset = rcnew; } else if (wchmm->outstyle[n] == AS_LRSET) { /* duplicate LRC_INFO because it has its own cache */ lrcnew = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root)); memcpy(lrcnew, wchmm->state[n_src].out.lrset, sizeof(LRC_INFO)); wchmm->state[n].out.lrset = lrcnew; } else { /* share same info, simply copy the pointer */ memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(ACOUSTIC_SPEC)); } }#else /* ~PASS1_IWCD */ memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(HTK_HMM_State *));#endif lastphone = wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-1]; acc_init(wchmm, n); /* add self transition arc */ wchmm->self_a[n] = wchmm->self_a[n_src]; /* copy transition arcs whose destination is the source node to new node */ if (hmm_logical_state_num(lastphone) == 3) { /* = 1 state */ /* phone with only 1 state should be treated carefully */ if (wchmm->winfo->wlen[word] == 1) { /* word consists of only this phone */ /* no arcs need to be copied: this is also a start node of a word */ wchmm->offset[word][0] = n; /* index the new word-beginning node as startnode (old ststart) */ if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) { wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); } } else { /* copy arcs from the last state of the previous phone */ n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-2] + hmm_logical_state_num(wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-2]) - 3; if(n_src == n_prev + 1) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -