📄 multi-gram.c
字号:
jlog("STAT: done\n"); /* extract name */ p = &(dict_file[0]); q = p; while(*p != '\0') { if (*p == '/') q = p + 1; p++; } p = q; while(*p != '\0' && *p != '.') { buf[p-q] = *p; p++; } buf[p-q] = '\0'; /* register the new grammar to multi-gram tree */ multigram_add(new_dfa, new_winfo, buf, lm); return TRUE;}/** * <JA> * 弹瓢箕に回年されたすべての矢恕をロ〖ドする. * * @param lm [i/o] 咐胳借妄インスタンス * * </JA> * <EN> * Load all the grammars specified at startup. * * @param lm [i/o] LM processing instance * * </EN> * @callgraph * @callergraph */booleanmultigram_load_all_gramlist(PROCESS_LM *lm){ GRAMLIST *g; GRAMLIST *groot; boolean ok_p; switch(lm->config->lmvar) { case LM_DFA_GRAMMAR: groot = lm->config->gramlist_root; break; case LM_DFA_WORD: groot = lm->config->wordlist_root; break; } ok_p = TRUE; for(g = groot; g; g = g->next) { if (multigram_read_file_and_add(g->dfafile, g->dictfile, lm) == FALSE) { ok_p = FALSE; } } return(ok_p);}/** * <JA> * 附哼ある矢恕の眶を评る(active/inactiveとも). * * @param lm [i/o] 咐胳借妄インスタンス * * @return 矢恕の眶を手す. * </JA> * <EN> * Get the number of current grammars (both active and inactive). * * @param lm [i/o] LM processing instance * * @return the number of grammars. * </EN> * @callgraph * @callergraph * @ingroup grammar */intmultigram_get_all_num(PROCESS_LM *lm){ MULTIGRAM *m; int cnt; cnt = 0; for(m=lm->grammars;m;m=m->next) cnt++; return(cnt);}/** * <JA> * 帽胳カテゴリの掳する矢恕を评る. * * @param category 帽胳カテゴリID * @param lm [i/o] 咐胳借妄インスタンス * * @return 帽胳カテゴリの掳する矢恕のIDを手す. * </JA> * <EN> * Get which grammar the given category belongs to. * * @param category word category ID * @param lm [i/o] LM processing instance * * @return the id of the belonging grammar. * </EN> * @callgraph * @callergraph * @ingroup grammar */intmultigram_get_gram_from_category(int category, PROCESS_LM *lm){ MULTIGRAM *m; int tb, te; for(m = lm->grammars; m; m = m->next) { if (m->newbie) continue; tb = m->cate_begin; te = tb + m->dfa->term_num; if (tb <= category && category < te) { /* found */ return(m->id); } } return(-1);}/** * <JA> * 帽胳IDから掳する矢恕を评る. * * @param wid 帽胳ID * @param lm [i/o] 咐胳借妄インスタンス * * @return 帽胳の掳する矢恕のIDを手す. * </JA> * <EN> * Get which grammar the given word belongs to. * * @param wid word ID * @param lm [i/o] LM processing instance * * @return the id of the belonging grammar. * </EN> * @callgraph * @callergraph * @ingroup grammar */intmultigram_get_gram_from_wid(WORD_ID wid, PROCESS_LM *lm){ MULTIGRAM *m; int wb, we; for(m = lm->grammars; m; m = m->next) { if (m->newbie) continue; wb = m->word_begin; we = wb + m->winfo->num; if (wb <= wid && wid < we) { /* found */ return(m->id); } } return(-1);}/** * <JA> * 瘦积している矢恕をすべて豺庶する。 * * @param root [in] root pointer of grammar list * </JA> * <EN> * Free all grammars. * * @param root [in] root pointer of grammar list * </EN> * @callgraph * @callergraph */voidmultigram_free_all(MULTIGRAM *root){ MULTIGRAM *m, *mtmp; m = root; while(m) { mtmp = m->next; if (m->dfa) dfa_info_free(m->dfa); word_info_free(m->winfo); free(m); m = mtmp; }}/** * <EN> * Return a grammar ID of the given grammar name. * </EN> * <JA> * LM面の矢恕を叹涟で浮瑚し·その矢恕IDを手すˉ * </JA> * * @param lm [in] LM process instance * @param gramname [in] grammar name * * @return grammar ID, or -1 if not found. * * @callgraph * @callergraph * @ingroup grammar * */intmultigram_get_id_by_name(PROCESS_LM *lm, char *gramname){ MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (strmatch(m->name, gramname)) break; } if (!m) { jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname); return -1; } return m->id;}/** * <EN> * Find a grammar in LM by its name. * </EN> * <JA> * LM面の矢恕を叹涟で浮瑚する. * </JA> * * @param lm [in] LM process instance * @param gramname [in] grammar name * * @return poitner to the grammar, or NULL if not found. * * @callgraph * @callergraph * @ingroup grammar * */MULTIGRAM *multigram_get_grammar_by_name(PROCESS_LM *lm, char *gramname){ MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (strmatch(m->name, gramname)) break; } if (!m) { jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname); return NULL; } return m;}/** * <EN> * Find a grammar in LM by its ID number. * </EN> * <JA> * LM面の矢恕を ID 戎规で浮瑚する. * </JA> * * @param lm [in] LM process instance * @param id [in] ID number * * @return poitner to the grammar, or NULL if not found. * * @callgraph * @callergraph * @ingroup grammar * */MULTIGRAM *multigram_get_grammar_by_id(PROCESS_LM *lm, unsigned short id){ MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (m->id == id) break; } if (!m) { jlog("ERROR: multi-gram: cannot find grammar id \"%d\"\n", id); return NULL; } return m;}/** * <EN> * @brief Append words to a grammar. * * Category IDs of grammar in the adding words will be copied as is to * the target grammar, so they should be set beforehand correctly. * The whole tree lexicon will be rebuilt later. * * Currently adding words to N-gram LM is not supported yet. * * </EN> * <JA> * @brief 帽胳礁圭を矢恕に纳裁するˉ * * 纳裁する帽胳の矢恕カテゴリIDについては·すでにアサインされているものが * そのままコピ〖されるˉよって·それらはこの簇眶を钙び叫す涟に· * 纳裁滦据の矢恕で腊圭拉が艰れるよう赖しく肋年されている涩妥があるˉ * 腾菇陇步辑今链挛が·稿に浩菇蜜されるˉ * * 帽胳N-gram咐胳モデルへの辑今纳裁は附哼サポ〖トされていないˉ * * </JA> * * @param lm [i/o] LM process instance * @param m [i/o] grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */booleanmultigram_add_words_to_grammar(PROCESS_LM *lm, MULTIGRAM *m, WORD_INFO *winfo){ int offset; if (lm == NULL || m == NULL || winfo == NULL) return FALSE; offset = m->winfo->num; printf("adding %d words to grammar #%d (%d words)\n", winfo->num, m->id, m->winfo->num); /* append to the grammar */ if (voca_append(m->winfo, winfo, m->id, offset) == FALSE) { jlog("ERROR: multi-gram: failed to add words to dict in grammar #%d \"%s\"\n", m->id, m->name); return FALSE; } /* update dictianary info */ if (lm->lmvar == LM_DFA_GRAMMAR) { if (m->dfa->term_num != 0) free_terminfo(&(m->dfa->term)); if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) { jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n"); return FALSE; } } /* prepare for update */ m->hook |= MULTIGRAM_MODIFIED; return TRUE;}/** * <EN> * @brief Append words to a grammar, given by its name. * * Call multigram_add_words_to_grammar() with target grammar * specified by its name. * </EN> * <JA> * @brief 叹涟で回年された矢恕に帽胳礁圭を纳裁するˉ * * multigram_add_words_to_grammar() を矢恕叹で回年して悸乖するˉ * * </JA> * * @param lm [i/o] LM process instance * @param gramname [in] name of the grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */booleanmultigram_add_words_to_grammar_by_name(PROCESS_LM *lm, char *gramname, WORD_INFO *winfo){ return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_name(lm, gramname), winfo));}/** * <EN> * @brief Append words to a grammar, given by its ID number. * * Call multigram_add_words_to_grammar() with target grammar * specified by its number. * </EN> * <JA> * @brief 戎规で回年された矢恕に帽胳礁圭を纳裁するˉ * * multigram_add_words_to_grammar() を戎规で回年して悸乖するˉ * * </JA> * * @param lm [i/o] LM process instance * @param id [in] ID number of the grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */booleanmultigram_add_words_to_grammar_by_id(PROCESS_LM *lm, unsigned short id, WORD_INFO *winfo){ return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_id(lm, id), winfo));}/* end of file */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -