📄 cdset.c
字号:
/** * @file cdset.c * @author Akinobu LEE * @date Tue Feb 15 17:58:54 2005 * * <JA> * @brief Pseudo %HMM セットの侯喇と瓷妄 * * "Pseudo %HMM" は·涂えられた不读モデルや%HMMリストで年盗されていない * バイフォンやモノフォンについて·それを鼎奶コンテキストとする * トライフォン礁圭で洛仑するもので·肩に妈1パスの帽胳粗トライフォン * 纷换に脱いられますˉ * * Julius は %HMM 年盗ファイルおよび%HMMリストを粕み哈んだあと· * まず链ての钓され评るモノフォンおよびバイフォンのリストを栏喇しますˉ * そしてれぞれについて·それを鼎奶のコンテキストとする * トライフォンのリストを侯喇し·そのリスト面のトライフォンの称觉轮を * マ〖ジしたもの (CD_State_Set) を妥燎とする菇陇挛 CD_Set を * HTKの%HMM年盗とは侍に糠たに栏喇しますˉ * * 毋えば·"a-k" という叹涟の pseudo %HMM は·"a-k+e", "a-k+b" などの * トライフォン觉轮の礁圭挛となりますˉまた "k" というモノフォンの pseudo %HMM * は·ベ〖ス不燎が "k" である链てのトライフォンの觉轮の礁圭となりますˉ * この栏喇された pseudo %HMM は链て HTK_HMM_INFO 柒の @a cdset_info に * 瘦赂されますˉ * * さらに·%HMM侠妄叹から悸挛を玫すインデックス腾 (@a logical_root) に· * この pseudo %HMM のリストが纳裁されますˉこれにより·%HMM年盗ファイル * および%HMMリストファイルのどちらにも年盗されていないバイフォンや * モノフォンについては·この pseudo %HMM が洛脱されるようになりますˉ * バイフォンやモノフォンが %HMM 年盗ファイルや %HMMリストファイルのどちらかで * 汤绩弄に回年されていれば·そちらが庭黎されますˉ * </JA> * * <EN> * @brief Generate and manage the pseudo %HMM set * * "Pseudo %HMM" is mainly for a substitution for unknown context-dependent * biphone and monophone %HMM that has not been defined in HTK %HMM * definition and HMMList mapping file. They are used mainly in the * cross-word triphone computation on the 1st pass. * * Julius first generates a list of possible biphone and monophone after * reading HTK %HMM definition file and HMMList logical name mapping file. * It then generate CD_Set structure for each possible biphone and * monophones by parsing all the %HMM definition to find the same context as * each phones. * * For example, the triphones like "a-k+e", "a-k+b", "a-k+a" will be grouped * as pseudo phone set "a-k". A pseudo phone "k" will contain all triphone * variants of the same base phone "k". This generated pseudo %HMM sets are * stored in @a cdset_info in HTK_HMM_INFO. * * Then, the pseudo phones, whose names (biphone or monophone) do not appear * in both of the HTK %HMM definitions and HMMList mapping file, will be added * as aliases to unspecified phones in the %HMM index tree. If biphones or * monophones are explicitly defined in %HMM definition or HMMList file, * they will be used instead of this pseudo phone. * * </EN> * * $Revision: 1.5 $ * *//* * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/htk_param.h>#include <sent/htk_hmm.h>//@{/// @ingroup cdset#define CD_STATE_SET_STEP 10 ///< CD_State_Set memory allocation step/** * Initialize total pseudo %HMM information in the given %HMM definition data. * * @param hmminfo [i/o] HTK %HMM definitions */static voidcdset_init(HTK_HMM_INFO *hmminfo){ hmminfo->cdset_info.cdtree = NULL;}/** * Allocate a CD_Set data for a new pseudo phone set. * * @return pointer to newly allocated CD_Set. */static CD_Set *cdset_new(){ return((CD_Set *)mymalloc(sizeof(CD_Set)));}/** * Look up for a pseudo phone with the name, and return the content. * * @param hmminfo [in] %HMM information to search for. * @param cdstr [in] string of pseudo phone name to search. * * @return pointer to the pseudo phone if found, or NULL if not found. */CD_Set *cdset_lookup(HTK_HMM_INFO *hmminfo, char *cdstr){ CD_Set *cd; cd = aptree_search_data(cdstr, hmminfo->cdset_info.cdtree); if (strmatch(cdstr, cd->name)) { return cd; } else { return NULL; }}/** * Look up for a pseudo phone by the "left - center" name of the given phone name. * * @param hmminfo [in] %HMM information to search for. * @param hmmname [in] string of the phone name. * * @return pointer to the pseudo phone if found, or NULL if not found. */CD_Set *lcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname){ static char buf[MAX_HMMNAME_LEN]; return(cdset_lookup(hmminfo, leftcenter_name(hmmname, buf)));}/** * Look up for a pseudo phone by the "center + right" name of the given phone name. * * @param hmminfo [in] %HMM information to search for. * @param hmmname [in] string of the phone name. * * @return pointer to the pseudo phone if found, or NULL if not found. */CD_Set *rcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname){ static char buf[MAX_HMMNAME_LEN]; return(cdset_lookup(hmminfo, rightcenter_name(hmmname, buf)));}/** * Output text information of a pseudo phone to stdout. * * @param ptr [in] pointer to a pseudo phone set. */static voidput_cdset(void *ptr){ int i; CD_Set *a; a = ptr; printf("name: %s\n", a->name); /* printf("state_num: %d\n", a->state_num); */ for(i=0;i<a->state_num;i++) { if (a->stateset[i].num == 0) { printf("\t[state %d] not exist\n", i); } else { printf("\t[state %d] %d variants\n", i, a->stateset[i].num); } /* for(j=0;j<a->stateset[i].num;j++) { put_htk_state(a->stateset[i].s[j]); } */ }}/** * Output all pseudo phone set information to stdout * * @param hmminfo [in] %HMM definition data that holds pseudo phone data. */voidput_all_cdinfo(HTK_HMM_INFO *hmminfo){ aptree_traverse_and_do(hmminfo->cdset_info.cdtree, put_cdset);}/** * Register a physical %HMM as a member of a pseudo phone set. * * @param root [i/o] root node of %HMM search index node. * @param d [in] a physical defined %HMM to be added. * @param cdname [in] name of the pseudo phone set. * * @return TRUE if newly registered, FALSE if the specified physical %HMM already exists in the pseudo phone. */booleanregist_cdset(APATNODE **root, HTK_HMM_Data *d, char *cdname){ boolean need_new; CD_State_Set *tmp; CD_Set *lset = NULL, *lmatch = NULL; int j,n; boolean changed = FALSE; if (strlen(cdname) >= MAX_HMMNAME_LEN) { j_error("Error: HMM name exceeds limit (%d): %s!\n", MAX_HMMNAME_LEN, cdname); } /* check if the cdset already exist */ need_new = TRUE; if (*root != NULL) { lmatch = aptree_search_data(cdname, *root); if (strmatch(lmatch->name, cdname)) { /* exist, add to it later */ lset = lmatch; need_new = FALSE; /* if the state num is larger than allocated, expand the lset */ if (d->state_num > lset->state_num) { lset->stateset = (CD_State_Set *)myrealloc(lset->stateset, sizeof(CD_State_Set) * d->state_num); /* 0 1 ... (lset->state_num-1) */ /* N A ... N */ /* 0 1 ... ... (d->state_num-1) */ /* N A ... A ..................... N */ /* malloc new area to expanded state (N to A above) */ for(j = lset->state_num - 1; j < d->state_num - 1; j++) { lset->stateset[j].maxnum = CD_STATE_SET_STEP; lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum); lset->stateset[j].num = 0; } lset->stateset[d->state_num-1].s = NULL; lset->stateset[d->state_num-1].num = 0; lset->stateset[d->state_num-1].maxnum = 0; lset->state_num = d->state_num; /* update transition table */ lset->tr = d->tr; changed = TRUE; } } } if (need_new) { /* allocate as new with blank data */ lset = cdset_new(); lset->name = strdup(cdname); lset->state_num = d->state_num; lset->stateset = (CD_State_Set *)mymalloc(sizeof(CD_State_Set) * lset->state_num); /* assume first and last state has no outprob */ lset->stateset[0].s = lset->stateset[lset->state_num-1].s = NULL; lset->stateset[0].num = lset->stateset[lset->state_num-1].num = 0; lset->stateset[0].maxnum = lset->stateset[lset->state_num-1].maxnum = 0; for(j=1;j<lset->state_num-1; j++) { /* pre-allocate only the first step */ lset->stateset[j].maxnum = CD_STATE_SET_STEP; lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum); lset->stateset[j].num = 0; } /* assign transition table of first found %HMM (ad-hoc?) */ lset->tr = d->tr; /* add to search index tree */ if (*root == NULL) { *root = aptree_make_root_node(lset); } else { aptree_add_entry(lset->name, lset, lmatch->name, root); } changed = TRUE; } /*j_printerr("add to \"%s\"\n", lset->name);*/ /* register each HMM states to the lcdset */ for (j=1;j<d->state_num-1;j++) { tmp = &(lset->stateset[j]); /* check if the state has already registered */ for(n = 0; n < tmp->num ; n++) { if (tmp->s[n] == d->s[j]) { /* compare by pointer */ /*j_printerr("\tstate %d has same\n", n);*/ break; } } if (n < tmp->num ) continue; /* same state found, cancel regist. */ /* expand storage area if necessary */ if (tmp->num >= tmp->maxnum) { tmp->maxnum += CD_STATE_SET_STEP; tmp->s = (HTK_HMM_State **)myrealloc(tmp->s, sizeof(HTK_HMM_State *) * tmp->maxnum); } tmp->s[tmp->num] = d->s[j]; tmp->num++; changed = TRUE; } return(changed);}/** * Remove an pseudo phone set entry from index tree * * @param hmminfo * @param cdname * * @return */booleanremove_cdset(HTK_HMM_INFO *hmminfo, char *cdname){ CD_Set *lmatch; if (hmminfo->cdset_info.cdtree == NULL) return TRUE; lmatch = aptree_search_data(cdname, hmminfo->cdset_info.cdtree); if (strmatch(lmatch->name, cdname)) { printf("[%s] found\n", lmatch->name); /* found */ /* for(j=1;j<lmatch->state_num-1;j++) { free(lmatch->stateset[j].s); } free(lmatch->stateset); */ aptree_remove_entry(cdname, &(hmminfo->cdset_info.cdtree)); } else { return FALSE; } return TRUE;} /** * Construct the whole pseudo %HMM information, and also add them to the logical Triphone tree. * * @param hmminfo [i/o] %HMM definition data. The generated data will also * be stored within this. * * @return TRUE on success, FALSE on failure. */booleanmake_cdset(HTK_HMM_INFO *hmminfo){ HMM_Logical *lg; static char buf[MAX_HMMNAME_LEN]; cdset_init(hmminfo); /* make cdset name from logical HMM name */ /* left-context set: "a-k" for /a-k+i/, /a-k+o/, ... for 1st pass (word end) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, leftcenter_name(lg->name, buf)); } /* right-context set: "a+o" for /b-a+o/, /t-a+o/, ... for 2nd pass (word beginning) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, rightcenter_name(lg->name, buf)); } /* both-context set: "a" for all triphone with same base phone "a" for 1st pass (1 phoneme word, with no previous word hypo.) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, center_name(lg->name, buf)); } /* now that cdset is completely built */ return(TRUE);}/** * callback for aptree function to free the content of pseudo phone set. * * @param arg [in] pointer to the pseudo phone set to be free */static voidcallback_free_lcdset_content(void *arg){ CD_Set *d; int j; d = arg; for(j=0;j<d->state_num;j++) { if (d->stateset[j].s != NULL) free(d->stateset[j].s); } free(d->stateset); free(d->name); free(d);}/** * Remove all the registered category-indexed pseudo state sets. * This function will be called when a grammar is changed to re-build the * state sets. * * @param root [i/o] pointer to hold the root index pointer */voidfree_cdset(APATNODE **root){ if (*root != NULL) { aptree_traverse_and_do(*root, callback_free_lcdset_content); free_aptree(*root); *root = NULL; }}//@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -