📄 wchmm.c
字号:
/** * @file wchmm.c * * <JA> * @brief 腾菇陇步辑今の菇蜜 * * ここでは·涂えられた帽胳辑今, HMM年盗および咐胳扩腆から腾菇陇步辑今を * 菇蜜する簇眶が年盗されています. 腾菇陇步辑今は弹瓢箕に菇蜜され· * 妈1パスの千急に脱いられます. 腾菇陇步辑今は觉轮帽疤で菇喇され· * 称觉轮はHMM叫蜗澄唯と莲败黎の戮·および玫瑚のための屯」な攫鼠を崔みます. * * 倡券の沸稗惧·ソ〖ス柒では腾菇陇步辑今は wchmm (word-conjunction HMM) と * も山附されています. * * </JA> * * <EN> * @brief Construction of tree lexicon. * * Functions to build a tree lexicon (or called word-conjunction HMM here) * from word dictionary, HMM and language models are defined here. The * constructed tree lexicon will be used for the recognition of the 1st pass. * The lexicon is composed per HMM state unit, and various informations * about output probabilities, arcs, language model constraints, and others * are assembled in the lexicon. * * Note that the word "wchmm" in the source code is a synonim of * "tree lexicon". * </EN> * * @author Akinobu Lee * @date Mon Sep 19 23:39:15 2005 * * $Revision: 1.7 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved *//* wchmm = word conjunction HMM = lexicon tree */#include <julius/julius.h>#define WCHMM_SIZE_CHECK ///< If defined, do wchmm size estimation (for debug only)/**************************************************************//*********** Initialization of tree lexicon *******************//**************************************************************//** * <JA> * 腾菇陇步辑今菇陇挛を糠惮に充り烧ける. * * @return 糠たにメモリ惧に充り烧けられた腾菇陇步辑今菇陇挛へのポインタを手す. * </JA> * <EN> * Allocate a new tree lexicon structure. * * @return pointer to the newly allocated tree lexicon structure. * </EN> * @callgraph * @callergraph */WCHMM_INFO *wchmm_new(){ WCHMM_INFO *w; w = (WCHMM_INFO *)mymalloc(sizeof(WCHMM_INFO)); w->lmtype = LM_UNDEF; w->lmvar = LM_UNDEF; w->ngram = NULL; w->dfa = NULL; w->winfo = NULL; w->malloc_root = NULL;#ifdef PASS1_IWCD w->lcdset_category_root = NULL; w->lcdset_mroot = NULL;#endif /* PASS1_IWCD */ w->wrk.out_from_len = 0; /* reset user function entry point */ w->uni_prob_user = NULL; w->bi_prob_user = NULL; return w;}/** * <JA> * 腾菇陇步辑今の柒推を介袋步する. * * @param wchmm [out] 腾菇陇步辑今へのポインタ * </JA> * <EN> * Initialize content of a lexicon tree. * * @param wchmm [out] pointer to the lexicon tree structure * </EN> */static voidwchmm_init(WCHMM_INFO *wchmm){ /* the resulting tree size is typically half of total state num */ wchmm->maxwcn = wchmm->winfo->totalstatenum / 2; wchmm->state = (WCHMM_STATE *)mymalloc(sizeof(WCHMM_STATE)*wchmm->maxwcn); wchmm->self_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn); wchmm->next_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn); wchmm->ac = (A_CELL2 **)mymalloc(sizeof(A_CELL2 *)*wchmm->maxwcn); wchmm->stend = (WORD_ID *)mymalloc(sizeof(WORD_ID)*wchmm->maxwcn); wchmm->offset = (int **)mymalloc(sizeof(int *)*wchmm->winfo->num); wchmm->wordend = (int *)mymalloc(sizeof(int)*wchmm->winfo->num); wchmm->maxstartnum = STARTNODE_STEP; wchmm->startnode = (int *)mymalloc(sizeof(int)*STARTNODE_STEP); wchmm->startnum = 0; if (wchmm->category_tree) { wchmm->start2wid = (WORD_ID *)mymalloc(sizeof(WORD_ID)*STARTNODE_STEP); } if (wchmm->hmminfo->multipath) { wchmm->wordbegin = (int *)mymalloc(sizeof(int)*wchmm->winfo->num); wchmm->wrk.out_from = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn); wchmm->wrk.out_from_next = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn); wchmm->wrk.out_a = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn); wchmm->wrk.out_a_next = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn); wchmm->wrk.out_from_len = wchmm->winfo->maxwn; } else { wchmm->wordend_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->winfo->num); }#ifdef PASS1_IWCD wchmm->outstyle = (unsigned char *)mymalloc(sizeof(unsigned char)*wchmm->maxwcn);#endif#ifdef UNIGRAM_FACTORING wchmm->start2isolate = NULL; wchmm->isolatenum = 0;#endif if (!wchmm->category_tree) { wchmm->sclist = NULL; wchmm->sclist2node = NULL;#ifdef UNIGRAM_FACTORING wchmm->fscore = NULL;#endif } wchmm->n = 0;}/** * <JA> * 腾菇陇步辑今の觉轮呈羌挝拌を MAXWCNSTEP 尸だけ凯墓する. * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Expand state-related area in a tree lexicon by MAXWCNSTEP. * * @param wchmm [i/o] tree lexicon * </EN> */static voidwchmm_extend(WCHMM_INFO *wchmm){ /* practical value! */ wchmm->maxwcn += wchmm->winfo->totalstatenum / 6; wchmm->state = (WCHMM_STATE *)myrealloc(wchmm->state, sizeof(WCHMM_STATE)*wchmm->maxwcn); wchmm->self_a = (LOGPROB *)myrealloc(wchmm->self_a, sizeof(LOGPROB)*wchmm->maxwcn); wchmm->next_a = (LOGPROB *)myrealloc(wchmm->next_a, sizeof(LOGPROB)*wchmm->maxwcn); wchmm->ac = (A_CELL2 **)myrealloc(wchmm->ac, sizeof(A_CELL2 *)*wchmm->maxwcn); wchmm->stend = (WORD_ID *)myrealloc(wchmm->stend, sizeof(WORD_ID)*wchmm->maxwcn);#ifdef PASS1_IWCD wchmm->outstyle = (unsigned char *)myrealloc(wchmm->outstyle, sizeof(unsigned char)*wchmm->maxwcn);#endif}/** * <JA> * 腾菇陇步辑今の帽胳黎片ノ〖ド呈羌挝拌を STARTNODE_STEP尸だけ凯墓する. (multipath) * * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Expand word-start nodes area in a tree lexicon by STARTNODE_STEP. (multipath) * * @param wchmm [i/o] tree lexicon * </EN> */static voidwchmm_extend_startnode(WCHMM_INFO *wchmm){ wchmm->maxstartnum += STARTNODE_STEP; wchmm->startnode = (int *)myrealloc(wchmm->startnode, sizeof(int) * wchmm->maxstartnum); if (wchmm->category_tree) { wchmm->start2wid = (WORD_ID *)myrealloc(wchmm->start2wid, sizeof(WORD_ID) * wchmm->maxstartnum); }}/** * <JA> * 腾菇陇步辑今およびその柒婶の充烧メモリを链て豺庶する. * * @param w [in] 腾菇陇步辑今 * </JA> * <EN> * Free all data in a tree lexicon. * * @param w [in] tree lexicon * </EN> * @callgraph * @callergraph */voidwchmm_free(WCHMM_INFO *w){ S_CELL *sc, *sctmp; int i; /* wchmm->state[i].ac malloced by mybmalloc2() */ /* wchmm->offset[][] malloced by mybmalloc2() */#ifdef PASS1_IWCD /* LRC_INFO, RC_INFO in wchmm->state[i].outsty malloced by mybmalloc2() */#endif /* they all will be freed by a single mybfree2() call */ mybfree2(&(w->malloc_root)); if (!w->category_tree) { if (w->sclist != NULL) { for(i=1;i<w->scnum;i++) { sc = w->sclist[i]; while(sc) { sctmp = sc->next; free(sc); sc = sctmp; } } free(w->sclist); } if (w->sclist2node != NULL) free(w->sclist2node);#ifdef UNIGRAM_FACTORING if (w->fscore != NULL) free(w->fscore);#endif }#ifdef UNIGRAM_FACTORING if (w->start2isolate != NULL) free(w->start2isolate);#endif#ifdef PASS1_IWCD free(w->outstyle);#endif if (w->hmminfo->multipath) { free(w->wordbegin); } else { free(w->wordend_a); } if (w->category_tree) free(w->start2wid); free(w->startnode); free(w->wordend); free(w->offset); free(w->stend); free(w->ac); free(w->next_a); free(w->self_a); free(w->state);#ifdef PASS1_IWCD if (w->category_tree) lcdset_remove_with_category_all(w);#endif /* PASS1_IWCD */ if (w->wrk.out_from_len != 0) { free(w->wrk.out_from); free(w->wrk.out_from_next); free(w->wrk.out_a); free(w->wrk.out_a_next); w->wrk.out_from_len = 0; } free(w);}/**************************************************************//*********** Word sort functions for tree construction ********//**************************************************************//** * <JA> * 帽胳を不燎のならびでソ〖トするqsort_reentrant簇眶 * * @param widx1 [in] 帽胳ID 1 へのポインタ * @param widx2 [in] 帽胳ID 2 へのポインタ * * @return 帽胳widx2が帽胳widx1の办婶か竞界であれば 1, 帽胳widx1が帽胳widx2の办婶か竞界であれば -1, 链く票じ不燎事びであれば 0 を手す. * </JA> * <EN> * qsort_reentrant function to sort words by their phoneme sequence. * * @param widx1 [in] pointer to word id #1 * @param widx2 [in] pointer to wrod id #2 * * @return 1 if word[widx2] is part of word[widx1], -1 if word[widx1] is part of word[widx2], or 0 if the two words are equal. * </EN> */static intcompare_wseq(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo){ int len1, len2, n; int p=0; len1 = winfo->wlen[*widx1]; len2 = winfo->wlen[*widx2]; n=0; /* while (n < len1 && n < len2 && (p = (int)winfo->wseq[*widx1][n] - (int)winfo->wseq[*widx2][n]) == 0 ) n++;*/ while (n < len1 && n < len2 && (p = strcmp((winfo->wseq[*widx1][n])->name, (winfo->wseq[*widx2][n])->name)) == 0 ) n++; if (n < len1) { if (n < len2) { /* differ */ return(p); } else { /* 2 is part of 1 */ return(1); } } else { if (n < len2) { /* 1 is part of 2 */ return(-1); } else { /* same */ return(0); } }}/** * <JA> * 帽胳IDの礁圭 windex[bgn..bgn+len-1] を帽胳の不燎ならびでソ〖トする. * * @param winfo [in] 帽胳辑今 * @param windex [i/o] 帽胳IDのインデックス误∈柒婶でソ〖トされる∷ * @param bgn [in] @a windex のソ〖ト倡幌爬 * @param len [in] @a windex の @a bgn からのソ〖トする妥燎眶 * </JA> * <EN> * Sort word IDs in windex[bgn..bgn+len-1] by their phoneme sequence order. * * @param winfo [in] word lexicon * @param windex [i/o] index sequence of word IDs, (will be sorted in this function) * @param bgn [in] start point to sort in @a windex * @param len [in] length of indexes to be sorted from @a bgn * </EN> */static voidwchmm_sort_idx_by_wseq(WORD_INFO *winfo, WORD_ID *windex, WORD_ID bgn, WORD_ID len){ qsort_reentrant(&(windex[bgn]), len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_wseq, winfo);}/** * <JA> * 帽胳をカテゴリIDでソ〖トするqsort簇眶. * * @param widx1 [in] 妥燎1へのポインタ * @param widx2 [in] 妥燎2へのポインタ * * @return * </JA> * <EN> * qsort function to sort words by their category ID. * * @param widx1 [in] pointer to element #1 * @param widx2 [in] pointer to element #2 * * @return * </EN> */static intcompare_category(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo){ int c1,c2; c1 = winfo->wton[*widx1]; c2 = winfo->wton[*widx2]; return(c1 - c2);}/** * <JA> * 帽胳ID礁圭 windex[0..len-1] をカテゴリIDでソ〖トする. * * @param winfo [in] 帽胳辑今 * @param windex [i/o] 帽胳IDのインデックス误∈柒婶でソ〖トされる∷ * @param len [in] @a windex の妥燎眶 * </JA> * <EN> * Sort word IDs in windex[0..len-1] by their category ID. * * @param winfo [in] tree lexicon * @param windex [i/o] index sequence of word IDs, (will be sorted in this function) * @param len [in] number of elements in @a windex * </EN> */static voidwchmm_sort_idx_by_category(WORD_INFO *winfo, WORD_ID *windex, WORD_ID len){ qsort_reentrant(windex, len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_category, winfo);} /**********************************************************************//************** Subroutines to link part of words ********************//**********************************************************************//** * <JA> * 2帽胳粗で·帽胳の黎片から票办で鼎铜材墙な不燎の眶を拇べる.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -