📄 multi-gram.c
字号:
/** * @file multi-gram.c * * <JA> * @brief 千急脱矢恕の瓷妄 * * このファイルには·千急脱矢恕の粕み哈みと瓷妄を乖う簇眶が崔まれています. * これらの簇眶は·矢恕ファイルの粕み哈み·および称硷デ〖タの * セットアップを乖います. * * 剩眶矢恕の票箕千急に滦炳しています. 剩眶の矢恕を办刨に粕み哈んで· * 事误に千急を乖えます. また·モジュ〖ルモ〖ドでは·クライアントから * 千急悸乖面に矢恕を瓢弄に纳裁ˇ猴近したり·办婶尸の矢恕を痰跟步ˇ * 铜跟步したりできます. また涂えられた改」の矢恕ごとに千急冯蔡を * 叫すことができます. * * 涂えられた∈剩眶の∷矢恕は办つのグロ〖バル矢恕として冯圭され, * 矢恕の粕み哈みや猴近などの觉轮恃构を乖ったとき·构糠されます. * 冯圭された菇矢惮搂 (DFA) が global_dfa に·胳酌辑今が global_winfo に * それぞれロ〖カルに呈羌されます. これらは努磊なタイミングで * multigram_build() が钙び叫されたときに·global.h 柒の络拌恃眶 dfa * および winfo にコピ〖され·千急借妄において蝗脱されるようになります. * </JA> * * <EN> * @brief Management of Recognition grammars * * This file contains functions to read and manage recognition grammar. * These function read in grammar and dictionary, and setup data for * recognition. * * Recognition with multiple grammars are supported. Julian can read * several grammars specified at startup time, and perform recognition * with those grammars simultaneously. In module mode, you can add / * delete / activate / deactivate each grammar while performing recognition, * and also can output optimum results for each grammar. * * Internally, the given grammars are composed to a single Global Grammar. * The global grammar will be updated whenever a new grammar has been read * or deleted. The syntax rule (DFA) of the global grammar will be stored * at global_dfa, and the corresponding dictionary will be at global_winfo * locally, independent of the decoding timing. After that, multigram_build() * will be called to make the prepared global grammar to be used in the * actual recognition process, by copying the grammar and the dictionary * to the global variable dfa and winfo. * * @author Akinobu Lee * @date Sat Jun 18 23:45:18 2005 * * $Revision: 1.8 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>/// For debug: define to enable grammar update messages to stdout#define MDEBUG/** * <JA> * @brief グロ〖バル矢恕から腾菇陇步辑今を菇蜜する. * * 涂えられた矢恕で千急を乖うために·千急借妄インスタンスが附哼积つ * グロ〖バル矢恕から腾菇陇步辑今を∈浩∷菇蜜します. また· * 弹瓢箕にビ〖ム升が汤绩弄に回绩されていない眷圭やフルサ〖チの眷圭· * ビ〖ム升の浩肋年も乖います. * * @param r [i/o] 千急借妄インスタンス * </JA> * <EN> * @brief Build tree lexicon from global grammar. * * This function will re-construct the tree lexicon using the global grammar * in the recognition process instance. If the beam width was not explicitly * specified on startup, the the beam width will be guessed * according to the size of the new lexicon. * * @param r [i/o] recognition process instance * </EN> */static booleanmultigram_rebuild_wchmm(RecogProcess *r){ boolean ret; /* re-build wchmm */ if (r->wchmm != NULL) { wchmm_free(r->wchmm); } r->wchmm = wchmm_new(); r->wchmm->lmtype = r->lmtype; r->wchmm->lmvar = r->lmvar; r->wchmm->ccd_flag = r->ccd_flag; r->wchmm->category_tree = TRUE; r->wchmm->hmmwrk = &(r->am->hmmwrk); /* assign models */ r->wchmm->dfa = r->lm->dfa; r->wchmm->winfo = r->lm->winfo; r->wchmm->hmminfo = r->am->hmminfo; if (r->wchmm->category_tree) { if (r->config->pass1.old_tree_function_flag) { ret = build_wchmm(r->wchmm, r->lm->config); } else { ret = build_wchmm2(r->wchmm, r->lm->config); } } else { ret = build_wchmm2(r->wchmm, r->lm->config); } /* 弹瓢箕 -check でチェックモ〖ドへ */ if (r->config->sw.wchmm_check_flag) { wchmm_check_interactive(r->wchmm); } if (ret == FALSE) { jlog("ERROR: multi-gram: failed to build (global) lexicon tree for recognition\n"); return FALSE; } /* guess beam width from models, when not specified */ r->trellis_beam_width = set_beam_width(r->wchmm, r->config->pass1.specified_trellis_beam_width); switch(r->config->pass1.specified_trellis_beam_width) { case 0: jlog("STAT: multi-gram: beam width set to %d (full) by lexicon change\n", r->trellis_beam_width); break; case -1: jlog("STAT: multi-gram: beam width set to %d (guess) by lexicon change\n", r->trellis_beam_width); } /* re-allocate factoring cache for the tree lexicon*/ /* for n-gram only?? */ //max_successor_cache_free(recog->wchmm); //max_successor_cache_init(recog->wchmm); /* finished! */ return TRUE;}/** * <EN> * @brief Check for global grammar and (re-)build tree lexicon if needed. * * If any modification of the global grammar has been occured, * the tree lexicons and some other data for recognition will be re-constructed * from the updated global grammar. * </EN> * <JA> * @brief グロ〖バル矢恕を拇べ·涩妥があれば腾菇陇步辑今を∈浩∷菇蜜する. * * グロ〖バル辑今に恃构があれば·その构糠されたグロ〖バル * 辑今から腾菇陇步辑今などの不兰千急脱デ〖タ菇陇を浩菇蜜する. * * </JA> * * @param r [in] recognition process instance * * @return TRUE on success, FALSE on error. * * @callgraph * @callergraph * @ingroup grammar * */booleanmultigram_build(RecogProcess *r){ if (r->lm->winfo != NULL) { /* re-build tree lexicon for recognition process */ if (multigram_rebuild_wchmm(r) == FALSE) { jlog("ERROR: multi-gram: failed to re-build tree lexicon\n"); return FALSE; }#ifdef MDEBUG jlog("STAT: wchmm (re)build completed\n");#endif } return(TRUE);}/** * <JA> * @brief グロ〖バル矢恕の琐萨に矢恕を纳裁する. * * もとの矢恕菇陇挛には·グロ〖バル矢恕のどの疤弥にその矢恕が纳裁 * されたか·そのカテゴリ戎规と辑今戎规の认跋が淡峡される. * * @param gdfa [i/o] 冯圭黎の矢恕のDFA攫鼠 * @param gwinfo [i/o] 冯圭黎の矢恕の辑今攫鼠 * @param m [i/o] 冯圭する矢恕攫鼠. * </JA> * <EN> * @brief Append a grammar to the tail of global grammar. * * The location of the grammar in the global grammar (categories and words) * will be stored to the grammar structure for later access. * * @param gdfa [i/o] DFA information of the global grammar * @param gwinfo [i/o] Dictionary information of the global grammar * @param m [i/o] New grammar information to be installed. * </EN> */static booleanmultigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m){ /* the new grammar 'm' will be appended to the last of gdfa and gwinfo */ m->state_begin = gdfa->state_num; /* initial state ID */ m->cate_begin = gdfa->term_num; /* initial terminal ID */ m->word_begin = gwinfo->num; /* initial word ID */ /* append category ID and node number of src DFA */ /* Julius allow multiple initial states: connect each initial node is not necesarry. */ dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin); /* append words of src vocabulary to global winfo */ if (voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin) == FALSE) { return FALSE; } /* append category->word mapping table */ terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin); /* append catergory-pair information */ /* pause has already been considered on m->dfa, so just append here */ if (cpair_append(gdfa, m->dfa, m->cate_begin) == FALSE) { return FALSE; } /* re-set noise entry by merging */ if (dfa_pause_word_append(gdfa, m->dfa, m->cate_begin) == FALSE) { return FALSE; } jlog("STAT: Gram #%d %s: installed\n", m->id, m->name); return TRUE;}/** * <JA> * 糠たな矢恕を·矢恕リストに纳裁する. * 附哼インスタンスが瘦积している矢恕のリストは lm->grammars に瘦赂される. * 纳裁した矢恕には·newbie, active のフラグがセットされ·肌搀の * 矢恕构糠チェック箕に构糠滦据となる. * * @param dfa [in] 纳裁判峡する矢恕のDFA攫鼠 * @param winfo [in] 纳裁判峡する矢恕の辑今攫鼠 * @param name [in] 纳裁判峡する矢恕の叹疚 * @param lm [i/o] 咐胳借妄インスタンス * * @return 矢恕IDを手す. * </JA> * <EN> * Add a new grammar to the current list of grammars. * The list of grammars which the LM instance keeps currently is * at lm->grammars. * The new grammar is flaged at "newbie" and "active", to be treated * properly at the next grammar update check. * * @param dfa [in] DFA information of the new grammar. * @param winfo [in] dictionary information of the new grammar. * @param name [in] name string of the new grammar. * @param lm [i/o] LM processing instance * * @return the new grammar ID for the given grammar. * </EN> * * @callgraph * @callergraph * @ingroup grammar */intmultigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm){ MULTIGRAM *new; /* allocate new gram */ new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM)); if (name != NULL) { strncpy(new->name, name, MAXGRAMNAMELEN); } else { strncpy(new->name, "(no name)", MAXGRAMNAMELEN); } new->id = lm->gram_maxid; new->dfa = dfa; new->winfo = winfo; new->hook = MULTIGRAM_DEFAULT; new->newbie = TRUE; /* need to setup */ new->active = TRUE; /* default: active */ /* the new grammar is now added to gramlist */ new->next = lm->grammars; lm->grammars = new; jlog("STAT: Gram #%d %s registered\n", new->id, new->name); lm->gram_maxid++; return new->id;}/** * <JA> * 矢恕を猴近する. * * 矢恕リスト面のある矢恕について·猴近マ〖クを烧ける. * 悸狠の猴近は multigram_exec_delete() で乖われる. * * @param delid [in] 猴近する矢恕の矢恕ID * @param lm [i/o] 咐胳借妄インスタンス * * @return 奶撅箕 TRUE を手す. 回年されたIDの矢恕が痰い眷圭は FALSE を手す. * </JA> * <EN> * Mark a grammar in the grammar list to be deleted at the next grammar update. * * @param delid [in] grammar id to be deleted * @param lm [i/o] LM processing instance * * @return TRUE on normal exit, or FALSE if the specified grammar is not found * in the grammar list. * </EN> * @callgraph * @callergraph * @ingroup grammar */booleanmultigram_delete(int delid, PROCESS_LM *lm){ MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (m->id == delid) { m->hook |= MULTIGRAM_DELETE; jlog("STAT: Gram #%d %s: marked delete\n", m->id, m->name); break; } } if (! m) { jlog("STAT: Gram #%d: not found\n", delid); return FALSE; } return TRUE;}/** * <JA> * すべての矢恕を肌搀构糠箕に猴近するようマ〖クする. * * @param lm [i/o] 咐胳借妄インスタンス * </JA> * <EN> * Mark all grammars to be deleted at next grammar update. * * @param lm [i/o] LM processing instance * </EN> * @callgraph * @callergraph * @ingroup grammar */voidmultigram_delete_all(PROCESS_LM *lm){ MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { m->hook |= MULTIGRAM_DELETE; }}/** * <JA> * 猴近マ〖クのついた矢恕をリストから猴近する. * * @param lm [i/o] 咐胳借妄インスタンス * * @return グロ〖バル矢恕の浩菇蜜が涩妥なときは TRUE を·稍涩妥なときは FALSE を手す. * </JA> * <EN> * Purge grammars marked as delete. * * @param lm [i/o] LM processing instance * * @return TRUE if the global grammar must be re-constructed, or FALSE if not needed. * </EN> */static booleanmultigram_exec_delete(PROCESS_LM *lm){ MULTIGRAM *m, *mtmp, *mprev; boolean ret_flag = FALSE; /* exec delete */ mprev = NULL; m = lm->grammars; while(m) { mtmp = m->next; if (m->hook & MULTIGRAM_DELETE) { /* if any grammar is deleted, we need to rebuild lexicons etc. */ /* so tell it to the caller */ if (! m->newbie) ret_flag = TRUE; if (m->dfa) dfa_info_free(m->dfa); word_info_free(m->winfo); jlog("STAT: Gram #%d %s: purged\n", m->id, m->name); free(m); if (mprev != NULL) { mprev->next = mtmp; } else { lm->grammars = mtmp; } } else { mprev = m; } m = mtmp; } return(ret_flag);}/**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -