📄 voca_load_htkdict.c
字号:
/** * @file voca_load_htkdict.c * * <JA> * @brief HTK妨及の帽胳辑今デ〖タの粕み哈み * * トライフォンモデルを脱いる眷圭·モノフォン山淡からトライフォンへの * 恃垂およびモデルの赂哼チェックはこの辑今粕み哈み箕に乖なわれますˉ * </JA> * * <EN> * @brief Read word dictionary from a file in HTK format * * When using triphone model, conversion from monophone expression * in dictionary to triphone and the existence check of word-internal * triphone will be done here. * </EN> * * @author Akinobu LEE * @date Fri Feb 18 19:43:06 2005 * * $Revision: 1.7 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/vocabulary.h>#include <sent/htk_hmm.h>/* * dictinary format: * * 1 words per line. * * fields: GrammarEntry [OutputString] phone1 phone2 .... * * GrammarEntry * (for N-gram) * word name in N-gram * (for DFA) * terminal symbol ID * * [OutputString] * String to output when the word is recognized. * * {OutputString} * String to output when the word is recognized. * Also specifies that this word is transparent * * phone1 phon2 .... * sequence of logical HMM name (normally phoneme) * to express the pronunciation */#define PHONEMELEN_STEP 30 ///< Memory allocation step for phoneme sequencestatic char buf[MAXLINELEN]; ///< Local work area for input text processingstatic char bufbak[MAXLINELEN]; ///< Local work area for debug messagestatic char trbuf[3][20]; ///< Local buffer for triphone convertionstatic char chbuf[30]; ///< Another local buffer for triphone convertionstatic char nophone[1]; ///< Local buffer to indicate 'no phone'static int trp_l; ///< Triphone cycle indexstatic int trp; ///< Triphone cycle indexstatic int trp_r; ///< Triphone cycle index/** * Return string of triphone name composed from last 3 call. * * @param p [in] next phone string * * @return the composed triphone name, or NULL on end. */char *cycle_triphone(char *p){ int i; if (p == NULL) { /* initialize */ nophone[0]='\0'; for(i=0;i<3;i++) trbuf[i][0] = '\0'; trp_l = 0; trp = 1; trp_r = 2; return NULL; } strcpy(trbuf[trp_r],p); chbuf[0]='\0'; if (trbuf[trp_l][0] != '\0') { strcat(chbuf,trbuf[trp_l]); strcat(chbuf,HMM_LC_DLIM); } if (trbuf[trp][0] == '\0') { i = trp_l; trp_l = trp; trp = trp_r; trp_r = i; return NULL; } strcat(chbuf, trbuf[trp]); if (trbuf[trp_r][0] != '\0') { strcat(chbuf,HMM_RC_DLIM); strcat(chbuf,trbuf[trp_r]); } i = trp_l; trp_l = trp; trp = trp_r; trp_r = i; return(chbuf);}/** * Flush the triphone buffer and return the last biphone. * * @return the composed last bi-phone name. */char *cycle_triphone_flush(){ return(cycle_triphone(nophone));}/** * Add a triphone name to the missing error list in WORD_INFO. * * @param winfo [i/o] word dictionary to add the error phone to error list * @param name [in] phone name to be added */static voidadd_to_error(WORD_INFO *winfo, char *name){ char *buf; char *match; buf = (char *)mymalloc(strlen(name) + 1); strcpy(buf, name); if (winfo->errph_root == NULL) { winfo->errph_root = aptree_make_root_node(buf, &(winfo->mroot)); } else { match = aptree_search_data(buf, winfo->errph_root); if (match == NULL || !strmatch(match, buf)) { aptree_add_entry(buf, buf, match, &(winfo->errph_root), &(winfo->mroot)); } }}/** * Traverse callback function to output a error phone. * * @param x [in] error phone string of the node */static voidcallback_list_error(void *x){ char *name; name = x; jlog("Error: voca_load_htkdict: %s\n", name);}/** * Output all error phones appeared while readin a word dictionary. * * @param winfo [in] word dictionary data */static voidlist_error(WORD_INFO *winfo){ jlog("Error: voca_load_htkdict: begin missing phones\n"); aptree_traverse_and_do(winfo->errph_root, callback_list_error); jlog("Error: voca_load_htkdict: end missing phones\n");}/** * Parse a word dictionary and set the maximum state length per word. * * @param winfo [i/o] */voidvoca_set_stats(WORD_INFO *winfo){ int w,p,n; int maxwn; int maxwlen; int states; int models; int trnum; maxwn = 0; maxwlen = 0; states = 0; models = 0; trnum = 0; for (w=0;w<winfo->num;w++) { models += winfo->wlen[w]; if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w]; n = 0; for (p=0;p<winfo->wlen[w];p++) { n += hmm_logical_state_num(winfo->wseq[w][p]) - 2; } if (maxwn < n) maxwn = n; states += n; if (winfo->is_transparent[w]) trnum++; } winfo->maxwn = maxwn; winfo->maxwlen = maxwlen; winfo->totalstatenum = states; winfo->totalmodelnum = models; winfo->totaltransnum = trnum;}/** * Start loading a dictionary. See voca_load_htkdict() for an example * of using this function. * * @param winfo [i/o] dictionary data where the data will be loaded * @param hmminfo [in] phoneme HMM definition * @param ignore_tri_conv [in] if TRUE, skip triphone conversion while loading * */voidvoca_load_start(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ winfo->ok_flag = TRUE; winfo->linenum = 0; if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) { winfo->do_conv = TRUE; } else { winfo->do_conv = FALSE; } winfo_init(winfo); winfo->num = 0;}/** * Load a line from buffer and set parameters to the dictionary. * See voca_load_htkdict() for an example of using this function. * * @param buf [in] input buffer containing a word entry * @param winfo [i/o] word dictionary to append the entry * @param hmminfo [in] phoneme HMM definition * * @return TRUE when successfully read, or FALSE on encountered end of * dictionary. When an error occurs, this function will set winfo->ok_flag * to FALSE. * */booleanvoca_load_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo){ WORD_ID vnum; winfo->linenum++; vnum = winfo->num; if (vnum >= winfo->maxnum) { if (winfo_expand(winfo) == FALSE) return FALSE; } if (voca_load_htkdict_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag)) == FALSE) { return FALSE; } winfo->num = vnum; return TRUE;}/** * End loading dictionary entries. It calculates some statistics for * the read entries, outputs errors if encountered * while the last loading, and returns with status whether an error * occured while loading. * * @param winfo [i/o] word dictionary just read by voca_load_line() calls * * @return TRUE when no error has been occured during loading, or FALSE * if an error occured. * */booleanvoca_load_end(WORD_INFO *winfo){ voca_set_stats(winfo); if (!winfo->ok_flag) { if (winfo->errph_root != NULL) list_error(winfo); } return(winfo->ok_flag);}/** * Top function to read word dictionary via file pointer (gzip enabled) * * @param fp [in] file pointer * @param winfo [out] pointer to word dictionary to store the read data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ret; voca_load_start(winfo, hmminfo, ignore_tri_conv); while (getl(buf, sizeof(buf), fp) != NULL) { if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; } ret = voca_load_end(winfo); return(ret);}/** * Top function to read word dictionary via normal file pointer. * * @param fp [in] file pointer * @param winfo [out] pointer to word dictionary to store the read data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_load_htkdict_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ret; voca_load_start(winfo, hmminfo, ignore_tri_conv); while(getl_fp(buf, MAXLINELEN, fp) != NULL) { if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; } ret = voca_load_end(winfo);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -