📄 wchmm.c
字号:
add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n); } else { for(ac=wchmm->ac[n_prev];ac;ac=ac->next) { for(j=0;j<ac->n;j++) { if (ac->arc[j] == n_src) { add_wacc(wchmm, n_prev, ac->a[j], n); } } } } /* also update the last offset (== wordend in this case) */ wchmm->offset[word][wchmm->winfo->wlen[word]-1] = n; } } else { /* phone with more than 2 states */ /* copy arcs from/to the source node to new node */ for (n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-1]; n_prev < n_src; n_prev++) { if (n_src == n_prev + 1) { add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n); } else { for(ac=wchmm->ac[n_prev];ac;ac=ac->next) { for(j=0;j<ac->n;j++) { if (ac->arc[j] == n_src) { add_wacc(wchmm, n_prev, ac->a[j], n); } } } } if (n_prev == n_src + 1) { add_wacc(wchmm, n, wchmm->next_a[n_src], n_prev); } else { for(ac=wchmm->ac[n_src];ac;ac=ac->next) { for(j=0;j<ac->n;j++) { if (ac->arc[j] == n_prev) { add_wacc(wchmm, n, ac->a[j], n_prev); } } } } } } /* map word <-> node */ wchmm->stend[n] = word; /* 'n' is an end node of word 'word' */ wchmm->wordend[word] = n; /* the word end node of 'word' is 'n' */ /* new state has been created: increment the size */ wchmm->n++; }/** * <JA> * 腾菇陇步辑今链挛を瘤汉して·すべての票不胳について帽胳姜眉觉轮の迫惟步 * を乖う. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Scan the whole lexicon tree to find already registered homophones, and * make word-end nodes of the found homophones isolated from others. * * @param wchmm [i/o] tree lexicon * </EN> */static intwchmm_duplicate_leafnode(WCHMM_INFO *wchmm){ int w, nlast, n, narc, narc_model; boolean *dupw; /* node marker */ A_CELL2 *actmp; int dupcount; dupcount = 0; nlast = wchmm->n; dupw = (boolean *)mymalloc(sizeof(boolean) * nlast); for(n=0;n<nlast;n++) dupw[n] = FALSE; /* initialize all marker */ for (w=0;w<wchmm->winfo->num;w++) { n = wchmm->wordend[w]; if (dupw[n]) { /* if already marked (2nd time or later */ wchmm_duplicate_state(wchmm, n, w); dupcount++; /* duplicate */ } else { /* if not marked yet (1st time) */ /* try to find an arc outside the word */ { /* count number of model-internal arc from the last state */ HMM_Logical *lastphone; HTK_HMM_Trans *tinfo; int laststate, i; lastphone = wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]; laststate = hmm_logical_state_num(lastphone) - 2; tinfo = hmm_logical_trans(lastphone); narc_model=0; for(i=1;i<hmm_logical_state_num(lastphone)-1;i++) { if (tinfo->a[laststate][i] != LOG_ZERO) narc_model++; } /* count number of actual arc from the last state in the tree */ narc = 0; if (wchmm->self_a[n] != LOG_ZERO) narc++; if (wchmm->next_a[n] != LOG_ZERO) narc++; for(actmp=wchmm->ac[n];actmp;actmp=actmp->next) narc += actmp->n; } /* if both number does not match, it means it is not a single word tail */ if (narc_model != narc) { /* word 'w' is embedded as part of other words at this node 'n' */ /* duplicate this node now */ wchmm_duplicate_state(wchmm, n, w); dupcount++; /* as new node has been assigned as word end node of word 'w', reset this source node as it is not the word end node */ wchmm->stend[n] = WORD_INVALID; } else { /* no arc to other node found, it means it is a single word tail */ /* as this is first time, only make sure that this node is word end of [w] */ wchmm->stend[n] = w; } /* mark node 'n' */ dupw[n] = TRUE; } } free(dupw); return(dupcount);}/**************************************************************//*************** add a word to wchmm lexicon tree *************//**************************************************************//** * <JA> * 腾菇陇步辑今に糠たに帽胳を纳裁する. 纳裁眷疥の攫鼠として·附哼の腾菇陇步 * 辑今柒で呵もその帽胳と黎片から紊くマッチする帽胳·およびそのマッチする墓さ * を回年する. * * @param wchmm [i/o] 腾菇陇步辑今 * @param word [in] 纳裁する辑今帽胳のID * @param matchlen [in] @a word と @a matchword の黎片からマッチする不燎墓 * @param matchword [in] 贷赂の腾菇陇步辑今面で @a word と呵もマッチする帽胳 * @param enable_iwsp [in] 帽胳粗ショ〖トポ〖ズ怠墙蝗脱箕TRUEを回年 * </JA> * <EN> * Add a new word to the lexicon tree. The longest matched word in the current * lexicon tree and the length of the matched phoneme from the word head should * be specified to tell where to insert the new word to the tree. * * @param wchmm [i/o] tree lexicon * @param word [in] word id to be added to the lexicon * @param matchlen [in] phoneme match length between @a word and @a matchword. * @param matchword [in] the longest matched word with @a word in the current lexicon tree * @param enable_iwsp [in] should be TRUE when using inter-word short pause option * </EN> */static booleanwchmm_add_word(WCHMM_INFO *wchmm, int word, int matchlen, int matchword, boolean enable_iwsp){ boolean ok_p; int j,k,n; int add_head, add_tail, add_to; int word_len, matchword_len; HMM_Logical *ltmp; int ato; LOGPROB prob; int ntmp; int ltmp_state_num;#ifdef PASS1_IWCD CD_Set *lcd = NULL;#endif int *out_from; int *out_from_next; LOGPROB *out_a; LOGPROB *out_a_next; /* for multipath handling */ int out_num_prev, out_num_next; int kkk; ok_p = TRUE; if (wchmm->hmminfo->multipath) { out_from = wchmm->wrk.out_from; out_from_next = wchmm->wrk.out_from_next; out_a = wchmm->wrk.out_a; out_a_next = wchmm->wrk.out_a_next; } /* * if (matchlen > 0) { * printf("--\n"); * put_voca(stdout, wchmm->winfo, word); * put_voca(stdout, wchmm->winfo, matchword); * printf("matchlen=%d\n", matchlen); * } */ /* variable abbreviations */ n = wchmm->n; word_len = wchmm->winfo->wlen[word]; matchword_len = wchmm->winfo->wlen[matchword]; /* malloc phone offset area */ wchmm->offset[word] = (int *)mybmalloc2(sizeof(int)*word_len, &(wchmm->malloc_root)); /* allocate unshared (new) part */ add_head = matchlen; add_tail = word_len - 1; add_to = matchlen - 1; if (wchmm->hmminfo->multipath) { /* make word-beginning node if needed */ if (matchlen == 0) { /* create word-beginning node */ wchmm->wordbegin[word] = n; wchmm->stend[n] = WORD_INVALID; acc_init(wchmm, n); wchmm->state[n].out.state = NULL; /* index the new word-beginning node as startnode (old ststart) */ wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); if (++n >= wchmm->maxwcn) wchmm_extend(wchmm); } else { wchmm->wordbegin[word] = wchmm->wordbegin[matchword]; } /* now n is at beginning of output state */ /* store the initial outgoing arcs to out_from[] and out_a[] */ out_num_prev = 0; if (matchlen == 0) { /* set the word-beginning node */ out_from[0] = wchmm->wordbegin[word]; out_a[0] = 0.0; out_num_prev = 1; } else { /*printf("%d(%s)\n", word, wchmm->winfo->woutput[word]);*/ /* on -iwsp, trailing sp is needed only when no phone will be created */ get_outtrans_list(wchmm, matchword, add_to, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, (enable_iwsp && add_tail - add_head + 1 <= 0) ? TRUE : FALSE); /*printf("NUM=%d\n", out_num_prev);*/ } } else { /* end of multipath block */ if (matchlen == 0) { if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) { /* index the new word-beginning node as startnode (old ststart) */ wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); } } } if (add_tail - add_head + 1 > 0) { /* there are new phones to be created */ ntmp = n; for (j=add_head; j <= add_tail; j++) { /* for each new phones */ ltmp = wchmm->winfo->wseq[word][j]; ltmp_state_num = hmm_logical_state_num(ltmp);#ifdef PASS1_IWCD if (wchmm->ccd_flag) { /* in the triphone lexicon tree, the last phone of a word has left-context cdset */ if (wchmm->winfo->wlen[word] > 1 && j == wchmm->winfo->wlen[word] - 1) { if (wchmm->category_tree) {#ifdef USE_OLD_IWCD lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name);#else lcd = lcdset_lookup_with_category(wchmm, ltmp, wchmm->winfo->wton[word]); if (lcd == NULL) { /* no category-aware cdset found. This is case when no word can follow this word grammatically. so fallback to normal state */ jlog("WARNING: wchmm: no lcdset found for [%s::%04d], fallback to [%s]\n", ltmp->name, wchmm->winfo->wton[word], ltmp->name); lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name); }#endif } else { lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name); } if (lcd == NULL) { jlog("ERROR: wchmm: at word #%d: no lcdset found for [%s]\n", word, ltmp->name); ok_p = FALSE; } } }#endif /* PASS1_IWCD */ for (k = 1; k < ltmp_state_num - 1; k++) { /* for each state in the phone */ /* set state output prob info */#ifdef PASS1_IWCD if (wchmm->ccd_flag) { /* output info of triphones needs special handling */ if (wchmm->winfo->wlen[word] == 1) { /* word with only 1 phone */ wchmm->outstyle[ntmp] = AS_LRSET; wchmm->state[ntmp].out.lrset = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root)); (wchmm->state[ntmp].out.lrset)->hmm = ltmp; (wchmm->state[ntmp].out.lrset)->state_loc = k; if (wchmm->category_tree) { (wchmm->state[ntmp].out.lrset)->category = wchmm->winfo->wton[word]; } } else if (j == 0) { /* head phone of a word */ wchmm->outstyle[ntmp] = AS_RSET; wchmm->state[ntmp].out.rset = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root)); (wchmm->state[ntmp].out.rset)->hmm = ltmp; (wchmm->state[ntmp].out.rset)->state_loc = k; } else if (j == wchmm->winfo->wlen[word] - 1) { /* last phone of a word */ wchmm->outstyle[ntmp] = AS_LSET; wchmm->state[ntmp].out.lset = &(lcd->stateset[k]); } else { wchmm->outstyle[ntmp] = AS_STATE; if (ltmp->is_pseudo) { jlog("WARNING: wchmm: word-internal phone should not be pseudo\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->state[ntmp].out.state = ltmp->body.defined->s[k]; } } else { /* monophone */ if (ltmp->is_pseudo) { j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->outstyle[ntmp] = AS_STATE; wchmm->state[ntmp].out.state = ltmp->body.defined->s[k]; }#else /* ~PASS1_IWCD */ if (ltmp->is_pseudo) { j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->state[ntmp].out = ltmp->body.defined->s[k];#endif /* PASS1_IWCD */ /* initialize other info */ acc_init(wchmm, ntmp); wchmm->stend[ntmp] = WORD_INVALID; if (! wchmm->hmminfo->multipath) { /* make transition arc from HMM transition info */ for (ato = 1; ato < ltmp_state_num; ato++) { prob = (hmm_logical_trans(ltmp))->a[k][ato]; if (prob != LOG_ZERO) { if (j == add_tail && k == ltmp_state_num - 2 && ato == ltmp_state_num - 1) { /* arc outside new part will be handled later */ } else { add_wacc(wchmm, ntmp, prob, ntmp + ato - k); } } } } ntmp++; /* expand wchmm if neccesary */ if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm); } /* end of state loop */ } /* end of phone loop */ if (wchmm->hmminfo->multipath) { /* On multipath version, the skip transition should be handled! */ /* make transition arc from HMM transition info */ ntmp = n; for (j = add_head; j <= add_tail; j++) { ltmp = wchmm->winfo->wseq[word][j]; ltmp_state_num = hmm_logical_state_num(ltmp); out_num_next = 0; /* arc from initial state ... need arc expansion from precious phone */ for (ato = 1; ato < ltmp_state_num; ato++) { prob = (hmm_logical_trans(ltmp))->a[0][ato]; if (prob != LOG_ZERO) { /* expand arc from previous HMM */ if (ato == ltmp_state_num - 1) { /* to final state ... just register states for next expansion */ for(kkk=0; kkk<out_num_prev; kkk++) { out_from_next[out_num_next] = out_from[kkk]; out_a_next[out_num_next] = out_a[kkk] + prob; out_num_next++; } } else { for(kkk=0; kkk<out_num_prev; kkk++) { add_wacc(wchmm, out_from[kkk], out_a[kkk] + prob, ntmp + ato - 1); } } } } /* end of state loop */ /* from outprob state */ for(k = 1; k < ltmp_state_num - 1; k++) { for (ato = 1; ato < ltmp_state_num; ato++) { prob = (hmm_logical_trans(ltmp))->a[k][ato]; if (prob != LOG_ZERO) { if (ato == ltmp_state_num - 1) { /* to final state ... register states for next expansion */ out_from_next[out_num_next] = ntmp; out_a_next[out_num_next] = prob; out_num_next++; } else { add_wacc(wchmm, ntmp, prob, ntmp + ato - k); } } } ntmp++; } /* end of state loop */ /* swap out list for next phone */ for(kkk=0;kkk<out_num_next;kkk++) { out_from[kkk] = out_from_next[kkk]; out_a[kkk] = out_a_next[kkk]; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -