📄 voca_load_htkdict.c
字号:
/** * @file voca_load_htkdict.c * @author Akinobu LEE * @date Fri Feb 18 19:43:06 2005 * * <JA> * @brief HTK妨及の帽胳辑今デ〖タの粕み哈み * * トライフォンモデルを脱いる眷圭·モノフォン山淡からトライフォンへの * 恃垂およびモデルの赂哼チェックはこの辑今粕み哈み箕に乖なわれますˉ * </JA> * * <EN> * @brief Read word dictionary from a file in HTK format * * When using triphone model, conversion from monophone expression * in dictionary to triphone and the existence check of word-internal * triphone will be done here. * </EN> * * $Revision: 1.4 $ * *//* * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/vocabulary.h>#include <sent/htk_hmm.h>/* * dictinary format: * * 1 words per line. * * fields: GrammarEntry [OutputString] phone1 phone2 .... * * GrammarEntry * (for N-gram) * word name in N-gram * (for DFA) * terminal symbol ID * * [OutputString] * String to output when the word is recognized. * * {OutputString} * String to output when the word is recognized. * Also specifies that this word is transparent * * phone1 phon2 .... * sequence of logical HMM name (normally phoneme) * to express the pronunciation */#define PHONEMELEN_STEP 30 ///< Memory allocation step for phoneme sequencestatic char buf[MAXLINELEN]; ///< Local work area for input text processingstatic char bufbak[MAXLINELEN]; ///< Local work area for debug messagestatic char trbuf[3][20]; ///< Local buffer for triphone convertionstatic char chbuf[30]; ///< Another local buffer for triphone convertionstatic char nophone[1]; ///< Local buffer to indicate 'no phone'static int trp_l; ///< Triphone cycle indexstatic int trp; ///< Triphone cycle indexstatic int trp_r; ///< Triphone cycle index/** * Return string of triphone name composed from last 3 call. * * @param p [in] next phone string * * @return the composed triphone name, or NULL on end. */char *cycle_triphone(char *p){ int i; if (p == NULL) { /* initialize */ nophone[0]='\0'; for(i=0;i<3;i++) trbuf[i][0] = '\0'; trp_l = 0; trp = 1; trp_r = 2; return NULL; } strcpy(trbuf[trp_r],p); chbuf[0]='\0'; if (trbuf[trp_l][0] != '\0') { strcat(chbuf,trbuf[trp_l]); strcat(chbuf,HMM_LC_DLIM); } if (trbuf[trp][0] == '\0') { i = trp_l; trp_l = trp; trp = trp_r; trp_r = i; return NULL; } strcat(chbuf, trbuf[trp]); if (trbuf[trp_r][0] != '\0') { strcat(chbuf,HMM_RC_DLIM); strcat(chbuf,trbuf[trp_r]); } i = trp_l; trp_l = trp; trp = trp_r; trp_r = i; return(chbuf);}/** * Flush the triphone buffer and return the last biphone. * * @return the composed last bi-phone name. */char *cycle_triphone_flush(){ return(cycle_triphone(nophone));}/** * Add a triphone name to the missing error list in WORD_INFO. * * @param winfo [i/o] word dictionary to add the error phone to error list * @param name [in] phone name to be added */static voidadd_to_error(WORD_INFO *winfo, char *name){ char *buf; char *match; buf = (char *)mymalloc(strlen(name) + 1); strcpy(buf, name); if (winfo->errph_root == NULL) { winfo->errph_root = aptree_make_root_node(buf); } else { match = aptree_search_data(buf, winfo->errph_root); if (!strmatch(match, buf)) { aptree_add_entry(buf, buf, match, &(winfo->errph_root)); } }}/** * Traverse callback function to output a error phone. * * @param x [in] error phone string of the node */static voidcallback_list_error(void *x){ char *name; name = x; j_printf("%s\n", name);}/** * Output all error phones appeared while readin a word dictionary. * * @param winfo [in] word dictionary data */static voidlist_error(WORD_INFO *winfo){ j_printf("////// Missing phones:\n"); aptree_traverse_and_do(winfo->errph_root, callback_list_error); j_printf("//////////////////////\n");}/** * Parse a word dictionary and set the maximum state length per word. * * @param winfo [i/o] */static voidset_maxwn(WORD_INFO *winfo){ int w,p,n; int maxwn; maxwn = 0; for (w=0;w<winfo->num;w++) { n = 0; for (p=0;p<winfo->wlen[w];p++) { n += hmm_logical_state_num(winfo->wseq[w][p]) - 2; } if (maxwn < n) maxwn = n; } winfo->maxwn = maxwn;}/** * Parse the word dictionary to set the maximum word length. * * @param winfo [i/o] word dictionary data */static voidset_maxwlen(WORD_INFO *winfo){ WORD_ID w; int maxwlen; maxwlen = 0; for(w=0;w<winfo->num;w++) { if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w]; } winfo->maxwlen = maxwlen;}/** * Top function to read word dictionary via file pointer * * @param fp [in] file pointer * @param winfo [out] pointer to word dictionary to store the read data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ok_flag = TRUE; WORD_ID vnum; boolean do_conv = FALSE; if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) do_conv = TRUE; winfo_init(winfo); vnum = 0; while (getl(buf, sizeof(buf), fp) != NULL) { if (vnum >= winfo->maxnum) winfo_expand(winfo); if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break; vnum++; } winfo->num = vnum; if (winfo->errph_root != NULL) list_error(winfo); /* compute maxwn */ set_maxwn(winfo); set_maxwlen(winfo); return(ok_flag);}/** * Top function to read word dictionary via file descriptor. * * @param fd [in] file descriptor * @param winfo [out] pointer to word dictionary to store the read data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_load_htkdict_fd(int fd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ok_flag = TRUE; WORD_ID vnum; boolean do_conv = FALSE; if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) do_conv = TRUE; winfo_init(winfo); vnum = 0; while(getl_fd(buf, MAXLINELEN, fd) != NULL) { if (vnum >= winfo->maxnum) winfo_expand(winfo); if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break; vnum++; } winfo->num = vnum; if (winfo->errph_root != NULL) list_error(winfo); /* compute maxwn */ set_maxwn(winfo); set_maxwlen(winfo); return(ok_flag);}/** * Top function to read word dictionary via socket descriptor. * * @param sd [in] socket descriptor * @param winfo [out] pointer to word dictionary to store the read data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_load_htkdict_sd(int sd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ok_flag = TRUE; WORD_ID vnum; boolean do_conv = FALSE; if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) do_conv = TRUE; winfo_init(winfo); vnum = 0; while(getl_sd(buf, MAXLINELEN, sd) != NULL) { if (vnum >= winfo->maxnum) winfo_expand(winfo); if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break; vnum++; } winfo->num = vnum; if (winfo->errph_root != NULL) list_error(winfo); /* compute maxwn */ set_maxwn(winfo); set_maxwlen(winfo); return(ok_flag);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -