📄 wchmm.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
      } else {	p = LOG_ZERO;      }      if (wchmm->lmvar == LM_NGRAM_USER) {	p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p);      }      if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) {	/* 裳刨の光い帽胳を腾菇陇步しない */	/* separate_thres は惧疤separate_wnum戎誊のスコア */	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	wchmm->separated_word_count++;	continue;      }#endif    }    /* 呵も墓く不燎を鼎铜叫丸る帽胳を玫す */    maxsharelen=0;    for (j=0;j<i;j++) {      if (wchmm->category_tree  && wchmm->lmtype == LM_DFA) {	if (wchmm->winfo->wton[i] != wchmm->winfo->wton[j]) continue;      }      sharelen = wchmm_check_match(wchmm->winfo, i, j);      if (sharelen == wchmm->winfo->wlen[i] && sharelen == wchmm->winfo->wlen[j]) {       /* word に票不胳が赂哼する */       /* 涩ず呵络の墓さであり·脚剩カウントを闰けるためここで却ける */       maxsharelen = sharelen;       matchword = j;       break;      }      if (sharelen > maxsharelen) {       matchword = j;       maxsharelen = sharelen;      }    }    if (wchmm_add_word(wchmm, i, maxsharelen, matchword, lmconf->enable_iwsp) == FALSE) {      jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");      ok_p = FALSE;    }  }#if 0  /* 腾菇陇を侯らない */  for (i=0;i<wchmm->winfo->num;i++) {    if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {      jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");      ok_p = FALSE;    }  }#endif    jlog("STAT:  %5d words ended     (%6d nodes)\n",i,wchmm->n);  if (! wchmm->hmminfo->multipath) {    /* 票办不燎废误を积つ帽胳票晃の leaf node を2脚步して惰侍する */    num_duplicated = wchmm_duplicate_leafnode(wchmm);    jlog("STAT:  %d leaf nodes are made unshared\n", num_duplicated);        /* 帽胳の姜眉から嘲への莲败澄唯を滇めておく */    wchmm_calc_wordend_arc(wchmm);  }  /* wchmmの腊圭拉をチェックする */  check_wchmm(wchmm);  /* factoring脱に称觉轮に稿鲁帽胳のリストを烧裁する */  if (!wchmm->category_tree) {#ifdef UNIGRAM_FACTORING    if (wchmm->lmtype == LM_PROB) {      /* 票箕に涟もってfactoring猛を纷换 */      make_successor_list_unigram_factoring(wchmm);      jlog("STAT:  1-gram factoring values has been pre-computed\n");    } else {      make_successor_list(wchmm);    }#else     make_successor_list(wchmm);#endif /* UNIGRAM_FACTORING */        if (wchmm->hmminfo->multipath) {      /* 菇蜜された factoring 攫鼠をスキップ莲败および矢片矢恕ノ〖ドにコピ〖 */      adjust_sc_index(wchmm);    }    #ifdef UNIGRAM_FACTORING    if (wchmm->lmtype == LM_PROB) {      /* 帽胳粗LMキャッシュが涩妥なノ〖ドのリストを侯る */      make_iwcache_index(wchmm);    }#endif /* UNIGRAM_FACTORING */    /* sclist2node is no longer used */    if (wchmm->sclist2node != NULL) {      free(wchmm->sclist2node);      wchmm->sclist2node = NULL;    }  }  jlog("STAT: done\n");  return ok_p;}/**  * <JA> * 涂えられた帽胳辑今と咐胳モデルから腾菇陇步辑今を菇蜜する.  * この簇眶は bulid_wchmm() と票じ借妄を乖いますが· * 呵介に帽胳を不燎误でソ〖トして不燎误の击た界に帽胳を事べるため· * より光庐に腾菇陇步を乖うことができる. とくにオプション回年をしない * 嘎り·Julius/Julianではこちらが脱いられる.  *  * @param wchmm [i/o] 腾菇陇步辑今 * @param lmconf [in] 咐胳モデル(LM)肋年パラメ〖タ * </JA> * <EN> * Build a tree lexicon from given word dictionary and language model. * This function does the same job as build_wchmm(), but it is much * faster because finding of the longest matched word to an adding word * is done by first sorting all the words in the dictoinary by their phoneme * sequence order.  This function will be used instead of build_wchmm() * by default. *  * @param wchmm [i/o] lexicon tree * @param lmconf [in] language model (LM) configuration parameters * </EN> * @callgraph * @callergraph */  booleanbuild_wchmm2(WCHMM_INFO *wchmm, JCONF_LM *lmconf){  int i,j, last_i;  int num_duplicated;  WORD_ID *windex;#ifdef SEPARATE_BY_UNIGRAM  LOGPROB separate_thres;  LOGPROB p;#endif  boolean ok_p;  boolean ret;  /* lingustic infos must be set before build_wchmm() is called */  /* check if necessary lingustic info is already assigned (for debug) */  if (wchmm->winfo == NULL      || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL)      || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL)      ) {    jlog("ERROR: wchmm: linguistic info not available!!\n");    return FALSE;  }  ok_p = TRUE;    wchmm->separated_word_count = 0;    jlog("STAT: Building HMM lexicon tree\n");    if (wchmm->lmtype == LM_PROB) {#ifdef SEPARATE_BY_UNIGRAM    /* compute score threshold beforehand to separate words from tree */    /* here we will separate best [separate_wnum] words from tree */    separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum);#endif  }#ifdef PASS1_IWCD#ifndef USE_OLD_IWCD  if (wchmm->category_tree) {    if (wchmm->ccd_flag) {      /* when Julian mode (category-tree) and triphone is used,	 make all category-indexed context-dependent phone set (cdset) here */      /* these will be assigned on the last phone of each word on tree */      lcdset_register_with_category_all(wchmm);    }  }#endif#endif /* PASS1_IWCD */ /* initialize wchmm */  wchmm_init(wchmm);  /* make sorted word index ordered by phone sequence */  windex = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->winfo->num);  for(i=0;i<wchmm->winfo->num;i++) windex[i] = i;  if (wchmm->category_tree && wchmm->lmtype == LM_DFA) {    /* sort by category -> sort by word ID in each category */    wchmm_sort_idx_by_category(wchmm->winfo, windex, wchmm->winfo->num);    {      int last_cate;      last_i = 0;      last_cate = wchmm->winfo->wton[windex[0]];      for(i = 1;i<wchmm->winfo->num;i++) {	if (wchmm->winfo->wton[windex[i]] != last_cate) {	  wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, i - last_i);	  last_cate = wchmm->winfo->wton[windex[i]];	  last_i = i;	}      }      wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, wchmm->winfo->num - last_i);    }  } else {    /* sort by word ID for whole vocabulary */    wchmm_sort_idx_by_wseq(wchmm->winfo, windex, 0, wchmm->winfo->num);  }/*  *   { *     int i,w; *     for(i=0;i<wchmm->winfo->num;i++) { *	 w = windex[i]; *	 printf("%d: cate=%4d wid=%4d %s\n",i, wchmm->winfo->wton[w], w, wchmm->winfo->woutput[w]); *     } *   } */  /* incrementaly add words to lexicon tree */  /* now for each word, the previous word (last_i) is always the most matched one */  last_i = WORD_INVALID;  for (j=0;j<wchmm->winfo->num;j++) {    i = windex[j];    if (wchmm->lmtype == LM_PROB) {      /* start/end silence word should not be shared */      if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {	/* add whole word as new (sharelen=0) */	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	continue;      }#ifndef NO_SEPARATE_SHORT_WORD      /* separate short words from tree */      if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	wchmm->separated_word_count++;	continue;      }#endif#ifdef SEPARATE_BY_UNIGRAM      if (wchmm->ngram) {	p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i])#ifdef CLASS_NGRAM	  + wchmm->winfo->cprob[i]#endif	  ;      } else {	p = LOG_ZERO;      }      if (wchmm->lmvar == LM_NGRAM_USER) {	p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p);      }      /* separate high-frequent words from tree (threshold = separate_thres) */      if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) {	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	wchmm->separated_word_count++;	continue;      }#endif    }    if (last_i == WORD_INVALID) { /* first word */      ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp);    } else {      /* the previous word (last_i) is always the most matched one */      if (wchmm->category_tree && wchmm->lmtype == LM_DFA) {	if (wchmm->winfo->wton[i] != wchmm->winfo->wton[last_i]) {	  ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp);	} else {	  ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp);	}      } else {	ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp);      }    }    if (ret == FALSE) {      jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");      ok_p = FALSE;    }    last_i = i;      } /* end of add word loop */    /*j_printerr("\r %5d words ended     (%6d nodes)\n",j,wchmm->n);*/  /* free work area */  free(windex);  if (wchmm->hmminfo->multipath) {    jlog("STAT: lexicon size: %d nodes\n", wchmm->n);  } else {    /* duplicate leaf nodes of homophone/embedded words */    jlog("STAT: lexicon size: %d", wchmm->n);    num_duplicated = wchmm_duplicate_leafnode(wchmm);    jlog("+%d=%d\n", num_duplicated, wchmm->n);  }  if (! wchmm->hmminfo->multipath) {    /* calculate transition probability of word end node to outside */    wchmm_calc_wordend_arc(wchmm);  }  /* check wchmm coherence (internal debug) */  check_wchmm(wchmm);  /* make successor list for all branch nodes for N-gram factoring */  if (!wchmm->category_tree) {#ifdef UNIGRAM_FACTORING    if (wchmm->lmtype == LM_PROB) {      /* for 1-gram factoring, we can compute the values before search */       make_successor_list_unigram_factoring(wchmm);       jlog("STAT:  1-gram factoring values has been pre-computed\n");    } else {      make_successor_list(wchmm);    }#else    make_successor_list(wchmm);#endif /* UNIGRAM_FACTORING */    if (wchmm->hmminfo->multipath) {      /* Copy the factoring data according to the skip transitions and startword nodes */      adjust_sc_index(wchmm);    }#ifdef UNIGRAM_FACTORING    if (wchmm->lmtype == LM_PROB) {      /* make list of start nodes that needs inter-word LM cache */      make_iwcache_index(wchmm);    }#endif /* UNIGRAM_FACTORING */    /* sclist2node is no longer used */    if (wchmm->sclist2node != NULL) {      free(wchmm->sclist2node);      wchmm->sclist2node = NULL;    }  }  //jlog("STAT: done\n");#ifdef WCHMM_SIZE_CHECK  if (debug2_flag) {    /* detailed check of lexicon tree size (inaccurate!) */    jlog("STAT: --- memory size of word lexicon ---\n");    jlog("STAT: wchmm: %d words, %d nodes\n", wchmm->winfo->num, wchmm->n);    jlog("STAT: %9d bytes: wchmm->state[node] (exclude ac, sc)\n", sizeof(WCHMM_STATE) * wchmm->n);    {      int count1 = 0;      int count2 = 0;      int count3 = 0;      for(i=0;i<wchmm->n;i++) {	if (wchmm->self_a[i] != LOG_ZERO) count1++;	if (wchmm->next_a[i] != LOG_ZERO) count2++;	if (wchmm->ac[i] != NULL) count3++;      }      jlog("STAT: %9d bytes: wchmm->self_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count1 / (float)wchmm->n);      jlog("STAT: %9d bytes: wchmm->next_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count2 / (float)wchmm->n);      jlog("STAT: %9d bytes: wchmm->ac[node] (%4.1f%% used)\n", sizeof(A_CELL2 *) * wchmm->n, 100.0 * count3 / (float)wchmm->n);    }    jlog("STAT: %9d bytes: wchmm->stend[node]\n", sizeof(WORD_ID) * wchmm->n);    {      int w,count;      count = 0;      for(w=0;w<wchmm->winfo->num;w++) {	count += wchmm->winfo->wlen[w] * sizeof(int) + sizeof(int *);      }      jlog("STAT: %9d bytes: wchmm->offset[w][]\n", count);    }    if (wchmm->hmminfo->multipath) {      jlog("STAT: %9d bytes: wchmm->wordbegin[w]\n", wchmm->winfo->num * sizeof(int));    }    jlog("STAT: %9d bytes: wchmm->wordend[w]\n", wchmm->winfo->num * sizeof(int));    jlog("STAT: %9d bytes: wchmm->startnode[]\n", wchmm->startnum * sizeof(int));    if (wchmm->category_tree) {      jlog("STAT: %9d bytes: wchmm->start2wid[]\n", wchmm->startnum * sizeof(WORD_ID));    }#ifdef UNIGRAM_FACTORING    if (wchmm->lmtype == LM_PROB) {      jlog("STAT: %9d bytes: wchmm->start2isolate[]\n", wchmm->isolatenum * sizeof(int));    }#endif    if (!wchmm->hmminfo->multipath) {      jlog("STAT: %9d bytes: wchmm->wordend_a[]\n", wchmm->winfo->num * sizeof(LOGPROB));    }#ifdef PASS1_IWCD    jlog("STAT: %9d bytes: wchmm->outstyle[]\n", wchmm->n * sizeof(unsigned char));    {
上一页 1 2 3 45
💿 文件大小 1116 K
👤 上传用户 a415834839
📂 所属分类网络
🏷️ 相关标签

#recognition #version #julius #about
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -