📄 m_fusion.c
字号:
/** * @file m_fusion.c * * <JA> * @brief 千急の呵姜洁洒をする. * * 肋年に骄い·モデルの粕み哈みˇ腾菇陇步辑今などのデ〖タ菇陇の菇蜜ˇ * ワ〖クエリアの澄瘦など·千急倡幌に涩妥な茨董の菇蜜を乖なう. * </JA> * * <EN> * @brief Final set up for recognition. * * These functions build everything needed for recognition: load * models into memory, build data structures such as tree lexicon, and * allocate work area for computation. * * </EN> * * @author Akinobu Lee * @date Thu May 12 13:31:47 2005 * * $Revision: 1.13 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>/** * <JA> * @brief 不读HMMをファイルから粕み哈み·千急脱にセットアップする. * * ファイルからのHMM年盗の粕み哈み·HMMList ファイルの粕み哈み· * パラメ〖タ房のチェック·マルチパス胺いの on/off, ポ〖ズモデルの肋年など * が乖われ·千急のための洁洒が乖われる. * * この不读モデルの掐蜗となる不读パラメ〖タの硷梧やパラメ〖タもここで * 呵姜疯年される. 疯年には·不读HMMのヘッダ·∈バイナリHMMの眷圭·赂 * 哼すれば∷バイナリHMMに虽め哈まれた泼魔翁攫鼠·jconf の肋年∈ばらば * らに·あるいは -htkconf 蝗脱箕∷などの攫鼠が脱いられる. * </JA> * <EN> * @brief Read in an acoustic HMM from file and setup for recognition. * * This functions reads HMM definitions from file, reads also a * HMMList file, makes logical-to-physical model mapping, determine * required parameter type, determine whether multi-path handling is needed, * and find pause model in the definitions. * * The feature vector extraction parameters are also finally * determined in this function. Informations used for the * determination is (1) the header values in hmmdefs, (2) embedded * parameters in binary HMM if you are reading a binary HMM made with * recent mkbinhmm, (3) user-specified parameters in jconf * configurations (either by separatedly specified or by -htkconf * options). * * </EN> * * @param amconf [in] AM configuration variables * @param jconf [i/o] global configuration variables * * @return the newly created HMM information structure, or NULL on failure. * */static HTK_HMM_INFO *initialize_HMM(JCONF_AM *amconf, Jconf *jconf){ HTK_HMM_INFO *hmminfo; /* at here, global variable "para" holds values specified by user or by user-specified HTK config file */ if (amconf->analysis.para_hmm.loaded == 1) { jlog("Warning: you seems to read more than one acoustic model for recognition, but\n"); jlog("Warning: previous one already has header-embedded acoustic parameters\n"); jlog("Warning: if you have different parameters, result may be wrong!\n"); } /* allocate new hmminfo */ hmminfo = hmminfo_new(); /* load hmmdefs */ if (init_hmminfo(hmminfo, amconf->hmmfilename, amconf->mapfilename, &(amconf->analysis.para_hmm)) == FALSE) { hmminfo_free(hmminfo); return NULL; } /* set multipath mode flag */ if (amconf->force_multipath) { jlog("STAT: m_fusion: force multipath HMM handling by user request\n"); hmminfo->multipath = TRUE; } else { hmminfo->multipath = hmminfo->need_multipath; } /* only MFCC is supported for audio input */ /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */ /* check parameter type of this acoustic HMM */ if (jconf->input.type == INPUT_WAVEFORM) { /* Decode parameter extraction type according to the training parameter type in the header of the given acoustic HMM */ if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) { jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC is supported\n"); hmminfo_free(hmminfo); return NULL; } /* set acoustic analysis parameters from HMM header */ calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); } /* check if tied_mixture */ if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) { jlog("ERROR: m_fusion: this tied-mixture model has no codebook!?\n"); hmminfo_free(hmminfo); return NULL; }#ifdef PASS1_IWCD /* make state clusters of same context for inter-word triphone approx. */ if (hmminfo->is_triphone) { jlog("STAT: making pseudo bi/mono-phone for IW-triphone\n"); if (make_cdset(hmminfo) == FALSE) { jlog("ERROR: m_fusion: failed to make context-dependent state set\n"); hmminfo_free(hmminfo); return NULL; } /* add those `pseudo' biphone and monophone to the logical HMM names */ /* they points not to the defined HMM, but to the CD_Set structure */ hmm_add_pseudo_phones(hmminfo); }#endif /* find short pause model and set to hmminfo->sp */ htk_hmm_set_pause_model(hmminfo, amconf->spmodel_name); hmminfo->cdset_method = amconf->iwcdmethod; hmminfo->cdmax_num = amconf->iwcdmaxn; if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk)); if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm)); apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default)); return(hmminfo); }/** * <JA> * Gaussian Mixture Selection のための觉轮联买脱モノフォンHMMを粕み哈む. * </JA> * <EN> * Initialize context-independent HMM for state selection with Gaussian * Mixture Selection. * </EN> * * @param amconf [in] AM configuratino variables * * @return the newly created HMM information structure, or NULL on failure. */static HTK_HMM_INFO *initialize_GSHMM(JCONF_AM *amconf){ HTK_HMM_INFO *hmm_gs; Value para_dummy; jlog("STAT: Reading GS HMMs:\n"); hmm_gs = hmminfo_new(); undef_para(¶_dummy); if (init_hmminfo(hmm_gs, amconf->hmm_gs_filename, NULL, ¶_dummy) == FALSE) { hmminfo_free(hmm_gs); return NULL; } return(hmm_gs);}/** * <JA> * 券厦浮沮ˇ逮笛脱の1觉轮 GMM を粕み哈んで介袋步する. * * </JA> * <EN> * Read and initialize an 1-state GMM for utterance verification and * rejection. * * </EN> * * @param jconf [in] global configuration variables * * @return the newly created GMM information structure in HMM format, * or NULL on failure. */static HTK_HMM_INFO *initialize_GMM(Jconf *jconf){ HTK_HMM_INFO *gmm; jlog("STAT: reading GMM: %s\n", jconf->reject.gmm_filename); if (jconf->gmm == NULL) { /* no acoustic parameter setting was given for GMM using -AM_GMM, copy the first AM setting */ jlog("STAT: -AM_GMM not used, use parameter of the first AM\n"); jconf->gmm = j_jconf_am_new(); memcpy(jconf->gmm, jconf->am_root, sizeof(JCONF_AM)); jconf->gmm->hmmfilename = NULL; jconf->gmm->mapfilename = NULL; jconf->gmm->spmodel_name = NULL; jconf->gmm->hmm_gs_filename = NULL; if (jconf->am_root->analysis.cmnload_filename) { jconf->gmm->analysis.cmnload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnload_filename)+ 1), jconf->am_root->analysis.cmnload_filename); } if (jconf->am_root->analysis.cmnsave_filename) { jconf->gmm->analysis.cmnsave_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnsave_filename)+ 1), jconf->am_root->analysis.cmnsave_filename); } if (jconf->am_root->frontend.ssload_filename) { jconf->gmm->frontend.ssload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->frontend.ssload_filename)+ 1), jconf->am_root->frontend.ssload_filename); } } gmm = hmminfo_new(); if (init_hmminfo(gmm, jconf->reject.gmm_filename, NULL, &(jconf->gmm->analysis.para_hmm)) == FALSE) { hmminfo_free(gmm); return NULL; } /* check parameter type of this acoustic HMM */ if (jconf->input.type == INPUT_WAVEFORM) { /* Decode parameter extraction type according to the training parameter type in the header of the given acoustic HMM */ if ((gmm->opt.param_type & F_BASEMASK) != F_MFCC) { jlog("ERROR: m_fusion: for direct speech input, only GMM trained by MFCC is supported\n"); hmminfo_free(gmm); return NULL; } } /* set acoustic analysis parameters from HMM header */ calc_para_from_header(&(jconf->gmm->analysis.para), gmm->opt.param_type, gmm->opt.vec_size); if (jconf->gmm->analysis.para_htk.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_htk)); if (jconf->gmm->analysis.para_hmm.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_hmm)); apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_default)); return(gmm);}/** * <JA> * @brief 帽胳辑今をファイルから粕み哈んでセットアップする. * * 辑今惧のモノフォン山淡からトライフォンへの纷换は init_voca() で * 粕み哈み箕に乖われる. このため·辑今粕み哈み箕には·千急で蝗脱する * 徒年のHMM攫鼠を涂える涩妥がある. * * N-gram 蝗脱箕は·矢片痰不帽胳およぶ矢琐痰不帽胳をここで肋年する. * また·"-iwspword" 回年箕は·ポ〖ズ帽胳を辑今の呵稿に赁掐する. * * </JA> * <EN> * @brief Read in word dictionary from a file and setup for recognition. * * Monophone-to-triphone conversion will be performed inside init_voca(). * So, an HMM definition data that will be used with the LM should also be * specified as an argument. * * When reading dictionary for N-gram, sentence head silence word and * tail silence word will be determined in this function. Also, * when an option "-iwspword" is specified, this will insert a pause * word at the last of the given dictionary. * * </EN> * * @param lmconf [in] LM configuration variables * @param hmminfo [in] HMM definition of each phone in dictionary, for * phone checking and monophone-to-triphone conversion. * * @return the newly created word dictionary structure, or NULL on failure. * */static WORD_INFO *initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo){ WORD_INFO *winfo; /* allocate new word dictionary */ winfo = word_info_new(); /* read in dictinary from file */ if ( ! #ifdef MONOTREE /* leave winfo monophone for 1st pass lexicon tree */ init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag)#else init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag)#endif ) { jlog("ERROR: m_fusion: failed to read dictionary, terminated\n"); word_info_free(winfo); return NULL; } if (lmconf->lmtype == LM_PROB) { /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */ if (lmconf->enable_iwspword) { if (#ifdef MONOTREE voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE)#else voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE)#endif == FALSE) { jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry); word_info_free(winfo); return NULL; } else { jlog("STAT: 1 IW-sp word entry added\n"); } } /* set {head,tail}_silwid */ winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo); if (winfo->head_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname); word_info_free(winfo); return NULL; } winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo); if (winfo->tail_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname); word_info_free(winfo); return NULL; } } return(winfo); }/** * <JA> * @brief 帽胳N-gramをファイルから粕み哈んでセットアップする. * * ARPA フォ〖マットで回年箕は·LRファイルと RL ファイルの寥圭せで * 瓢侯が佰なる. LR のみ·あるいは RL のみ回年箕は·それをそのまま粕み哈む. * 列数とも回年されている眷圭は·RLをまず肩モデルとして粕み哈んだ稿· * LR の 2-gram だけを妈1パス脱に肩モデルに纳裁粕み哈みする. * * また·粕み哈み姜位稿·辑今惧のN-gramエントリとのマッチングを艰る. * * </JA> * <EN> * @brief Read in word N-gram from file and setup for recognition. * * When N-gram is specified in ARPA format, the behavior relies on whether * N-grams are specified in "-nlr" and "-nrl". When either of them was * solely specified, this function simply read it. If both are specified, * it will read the RL model fully as a primary model, and additionally * read only the 2-gram part or the LR model as the first pass LM. * * Also, this function create mapping from dictionary words to LM entry. * * </EN> * * @param lmconf [in] LM configuration variables * @param winfo [i/o] word dictionary that will be used with this N-gram. * each word in the dictionary will be assigned to an N-gram entry here. * * @return the newly created N-gram information data, or NULL on failure. * */static NGRAM_INFO *initialize_ngram(JCONF_LM *lmconf, WORD_INFO *winfo){ NGRAM_INFO *ngram; boolean ret; /* allocate new */ ngram = ngram_info_new(); /* load LM */ if (lmconf->ngram_filename != NULL) { /* binary format */ ret = init_ngram_bin(ngram, lmconf->ngram_filename); } else { /* ARPA format */ /* if either forward or backward N-gram is specified, read it */ /* if both specified, use backward N-gram as main and use forward 2-gram only for 1st pass (this is an old behavior) */ if (lmconf->ngram_filename_rl_arpa) { ret = init_ngram_arpa(ngram, lmconf->ngram_filename_rl_arpa, DIR_RL); if (ret == FALSE) { ngram_info_free(ngram); return NULL; } if (lmconf->ngram_filename_lr_arpa) { ret = init_ngram_arpa_additional(ngram, lmconf->ngram_filename_lr_arpa); if (ret == FALSE) { ngram_info_free(ngram); return NULL; } } } else if (lmconf->ngram_filename_lr_arpa) { ret = init_ngram_arpa(ngram, lmconf->ngram_filename_lr_arpa, DIR_LR); } } if (ret == FALSE) { ngram_info_free(ngram); return NULL; } /* set unknown (=OOV) word id */ set_unknown_id(ngram, lmconf->unknown_name); /* map dict item to N-gram entry */ if (make_voca_ref(ngram, winfo) == FALSE) { ngram_info_free(ngram); return NULL; } /* post-fix EOS / BOS uni prob for SRILM */ fix_uniprob_srilm(ngram, winfo); return(ngram);}/** * <EN> * @brief Load an acoustic model. * * This function will create an AM process instance using the given AM * configuration, and load models specified in the configuration into * the instance. Then the created instance will be installed to the * engine instance. The amconf should be registered to the global * jconf before calling this function. * * </EN> * * <JA> * @brief 不读モデルを粕み哈むˉ * * この簇眶は·涂えられた AM 肋年に骄って AM 借妄インスタンスを栏喇し· * その面に不读モデルをロ〖ドしますˉその稿·そのAM借妄インスタンスは
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -