📄 wchmm.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 5 页
字号:
	  out_num_prev = out_num_next;	}	/* end of phone loop */      }	/* end of multipath block */        } /* new phone node creation loop for this word */  /*************************************/  /* Short Pause appending (multipath) */  /*************************************/    /* if -iwsp, add noise model to the end of word at ntmp */  if (wchmm->hmminfo->multipath && enable_iwsp && add_tail - add_head + 1 > 0) { /* there are new phones to be created */    int ntmp_bak;        /* set short pause state info */    ntmp_bak = ntmp;    if (wchmm->hmminfo->sp->is_pseudo) {      for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {	wchmm->outstyle[ntmp] = AS_LSET;	wchmm->state[ntmp].out.lset = &(wchmm->hmminfo->sp->body.pseudo->stateset[k]);	acc_init(wchmm, ntmp);	wchmm->stend[ntmp] = WORD_INVALID;	ntmp++;	if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);      }    } else {      for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {	wchmm->outstyle[ntmp] = AS_STATE;	wchmm->state[ntmp].out.state = wchmm->hmminfo->sp->body.defined->s[k];	acc_init(wchmm, ntmp);	wchmm->stend[ntmp] = WORD_INVALID;	ntmp++;	if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);      }    }    ntmp = ntmp_bak;    /* connect incoming arcs from previous phone */    out_num_next = 0;    for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) {      prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[0][ato];      if (prob != LOG_ZERO) {	/* to control short pause insertion, transition probability toward	 the word-end short pause will be given a penalty */	prob += wchmm->hmminfo->iwsp_penalty;	if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) {	  /* model has a model skip transition, just inherit them to next */	  for(kkk=0; kkk<out_num_prev; kkk++) {	    out_from_next[out_num_next] = out_from[kkk];	    out_a_next[out_num_next] = out_a[kkk] + prob;	    out_num_next++;	  }	} else {	  /* connect incoming arcs from previous phone to this phone */	  for(kkk=0; kkk<out_num_prev; kkk++) {	    add_wacc(wchmm, out_from[kkk], out_a[kkk] + prob, ntmp + ato - 1);	  }	}      }    }    /* if short pause model doesn't have a model skip transition, also add it */    if (hmm_logical_trans(wchmm->hmminfo->sp)->a[0][hmm_logical_state_num(wchmm->hmminfo->sp)-1] == LOG_ZERO) {      /* to make insertion sp model to have no effect on the original path,	 the skip transition probability should be 0.0 (=100%) */      prob = 0.0;      for(kkk=0; kkk<out_num_prev; kkk++) {	out_from_next[out_num_next] = out_from[kkk];	out_a_next[out_num_next] = out_a[kkk] + prob;	out_num_next++;      }    }    /* connect arcs within model, and store new outgoing arcs for wordend node */    for (k = 1; k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {      for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) {	prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][ato];	if (prob != LOG_ZERO) {	  if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) {	    out_from_next[out_num_next] = ntmp;	    out_a_next[out_num_next] = prob;	    out_num_next++;	  } else {	    add_wacc(wchmm, ntmp, prob, ntmp + ato - k);	  }	}      }      ntmp++;    }    /* swap work area for next */    for(kkk=0;kkk<out_num_next;kkk++) {      out_from[kkk] = out_from_next[kkk];      out_a[kkk] = out_a_next[kkk];    }    out_num_prev = out_num_next;  } /* end of inter-word short pause appending block */  /* make mapping: word <-> node on wchmm */  for (j=0;j<word_len;j++) {    if (j < add_head) {	/* shared part */      wchmm->offset[word][j] = wchmm->offset[matchword][j];    } else if (add_tail < j) { /* shared tail part (should not happen..) */      wchmm->offset[word][j] = wchmm->offset[matchword][j+(matchword_len-word_len)];    } else {			/* newly created part */      wchmm->offset[word][j] = n;      n += hmm_logical_state_num(wchmm->winfo->wseq[word][j]) - 2;    }  }  if (wchmm->hmminfo->multipath) {    /* create word-end node */    /* paranoia check if the short-pause addition has been done well */    if (enable_iwsp && add_tail - add_head + 1 > 0) {      n += hmm_logical_state_num(wchmm->hmminfo->sp) - 2;      if (n != ntmp) j_internal_error("wchmm_add_word: cannot match\n");    }        /* create word-end node */    wchmm->wordend[word] = n;	/* tail node of 'word' is 'n' */    wchmm->stend[n] = word;	/* node 'k' is a tail node of 'word' */    acc_init(wchmm, n);    wchmm->state[n].out.state = NULL;        /* connect the final outgoing arcs in out_from[] to the word end node */    for(k = 0; k < out_num_prev; k++) {      add_wacc(wchmm, out_from[k], out_a[k], n);    }    n++;    if (n >= wchmm->maxwcn) wchmm_extend(wchmm);        if (matchlen == 0) {      /* check if the new word has whole word-skipping transition */      /* (use out_from and out_num_prev temporary) */      out_num_prev = 0;      get_outtrans_list(wchmm, word, word_len-1, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, enable_iwsp);      for(k=0;k<out_num_prev;k++) {	if (out_from[k] == wchmm->wordbegin[word]) {	  jlog("ERROR: *** ERROR: WORD SKIPPING TRANSITION NOT ALLOWED ***\n");	  jlog("ERROR:   Word id=%d (%s[%s]) has \"word skipping transition\".\n", word, wchmm->winfo->wname[word], wchmm->winfo->woutput[word]);	  jlog("ERROR:   All HMMs in the word:\n    ");	  for(kkk=0;kkk<wchmm->winfo->wlen[word];kkk++) {	    jlog("%s ", wchmm->winfo->wseq[word][kkk]->name);	  }	  jlog("\n");	  jlog("ERROR:  has transitions from initial state to final state.\n");	  jlog("ERROR:  This type of word skipping is not supported.\n");	  ok_p = FALSE;	}      }    }    wchmm->n = n;  } else {    wchmm->n = n;    k = wchmm->offset[word][word_len-1] + hmm_logical_state_num(wchmm->winfo->wseq[word][word_len-1])-2 -1;    wchmm->wordend[word] = k;	/* tail node of 'word' is 'k' */    wchmm->stend[k] = word;	/* node 'k' is a tail node of 'word' */        if (matchlen != 0 && add_tail - add_head + 1 > 0) {      /* new part has been created in the above procedure: */      /* now make link from shared part to the new part */      wchmm_link_subword(wchmm, matchword,add_to,word,add_head);	    }  }  return(ok_p);  }/*************************************************************//**** parse whole structure (after wchmm has been built) *****//*************************************************************//**  * <JA> * 腾菇陇步辑今を瘤汉し·帽胳の姜眉觉轮から嘲への肌莲败澄唯のリストを侯喇する.  * (non multipath) *  * @param wchmm [i/o] 腾菇陇步辑今 * </JA> * <EN> * Scan the lexicon tree to make list of emission probability from the word end * state. (non multipath) *  * @param wchmm [i/o] tree lexicon * </EN> */static voidwchmm_calc_wordend_arc(WCHMM_INFO *wchmm){  WORD_ID w;  HTK_HMM_Trans *tr;  LOGPROB a;  for (w=0;w<wchmm->winfo->num;w++) {    tr = hmm_logical_trans(wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]);    a = tr->a[tr->statenum-2][tr->statenum-1];    wchmm->wordend_a[w] = a;  }}#ifdef SEPARATE_BY_UNIGRAM/********************************************************************//****** for separation (linearization) of high-frequent words *******//********************************************************************//**  * <JA> * unigram澄唯でソ〖トするための qsort コ〖ルバック簇眶.  *  * @param a [in] 妥燎1 * @param b [in] 妥燎2 *  * @return 遍换の冯蔡の射圭を手す.  * </JA> * <EN> * qsort callback function to sort unigram values. *  * @param a [in] element #1 * @param b [in] element #2 *  * @return the result of comparison. * </EN> */static intcompare_prob(LOGPROB *a, LOGPROB *b){  if (*a < *b)  return (1);  if (*a > *b)  return (-1);  return(0);}/**  * <JA> * 1-gramスコアの惧疤 N 戎誊の猛を滇める.  *  * @param winfo [in] 帽胳辑今 * @param n [in] 滇める界疤 *  * @return 惧疤 N 戎誊の uni-gram 澄唯の猛を手す.  * </JA> * <EN> * Get the Nth-best unigram probability from all words. *  * @param winfo [in] word dictionary * @param n [in] required rank *  * @return the Nth-best unigram probability. * </EN> */static LOGPROBget_nbest_uniprob(WCHMM_INFO *wchmm, int n){  LOGPROB *u_p;  WORD_ID w;  LOGPROB x;  WORD_INFO *winfo;  NGRAM_INFO *ngram;  winfo = wchmm->winfo;  ngram = wchmm->ngram;  if (n < 1) n = 1;  if (n > winfo->num) n = winfo->num;  /* store all unigram probability to u_p[] */  u_p = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);  for(w=0;w<winfo->num;w++) {    if (ngram) {      x = uni_prob(ngram, winfo->wton[w])#ifdef CLASS_NGRAM	+ winfo->cprob[w]#endif	;    } else {      x = LOG_ZERO;    }    if (wchmm->lmvar == LM_NGRAM_USER) {      x = (*(wchmm->uni_prob_user))(wchmm->winfo, w, x);    }    u_p[w] = x;  }  /* sort them downward */  qsort(u_p, winfo->num, sizeof(LOGPROB),	(int (*)(const void *,const void *))compare_prob);  /* return the Nth value */  x = u_p[n-1];  free(u_p);  return(x);}#endif/**********************************************************//****** MAKE WCHMM (LEXICON TREE) --- main function *******//**********************************************************/#define COUNT_STEP 500         ///< Word count step for debug progress output/**  * <JA> * 涂えられた帽胳辑今と咐胳モデルから腾菇陇步辑今を菇蜜する. この簇眶は * 借妄が觅く·Julianで"-oldtree"オプション回年箕のみ蝗脱されます. オプション * 润回年箕およびJuliusでは洛わりに build_wchmm2() が脱いられます.  *  * @param wchmm [i/o] 腾菇陇步辑今 * @param lmconf [in] 咐胳モデル(LM)肋年パラメ〖タ * </JA> * <EN> * Build a tree lexicon from given word dictionary and language model. * This function is slow and only used when "-oldtree" option is specified * in Julian.  Julian without that option and Julius uses build_wchmm2() * instead of this. *  * @param wchmm [i/o] lexicon tree * @param lmconf [in] language model (LM) configuration parameters * </EN> * @callgraph * @callergraph */booleanbuild_wchmm(WCHMM_INFO *wchmm, JCONF_LM *lmconf){  int i,j;  int matchword=0, sharelen=0, maxsharelen=0;  int num_duplicated;#ifdef SEPARATE_BY_UNIGRAM  LOGPROB separate_thres;  LOGPROB p;#endif  boolean ok_p;  /* lingustic infos must be set before build_wchmm() is called */  /* check if necessary lingustic info is already assigned (for debug) */  if (wchmm->winfo == NULL      || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL)      || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL)      ) {    jlog("ERROR: wchmm: linguistic info not available!!\n");    return FALSE;  }  ok_p = TRUE;  #ifdef SEPARATE_BY_UNIGRAM  /* 惧疤[separate_wnum]戎誊の1-gramスコアを滇める */  /* 1-gramスコアがこの猛笆惧のものは腾から尸ける */  separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum);#endif#ifdef PASS1_IWCD#ifndef USE_OLD_IWCD  if (wchmm->category_tree) {    if (wchmm->ccd_flag) {      /* 链てのカテゴリID烧き lcd_set を侯喇 */      lcdset_register_with_category_all(wchmm);    }  }#endif#endif /* PASS1_IWCD */    /* wchmmを介袋步 */  wchmm_init(wchmm);  /* カウンタリセット */  wchmm->separated_word_count=0;  jlog("STAT: wchmm: Building HMM lexicon tree (left-to-right)\n");  for (i=0;i<wchmm->winfo->num;i++) {    if (wchmm->lmtype == LM_PROB) {      if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {	/* 黎片/琐萨の痰不モデルは腾菇陇步せず·	 * 黎片の痰不帽胳の黎片への莲败·琐萨帽胳の琐萨からの莲败は侯らない*/	/* sharelen=0でそのまま */	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	continue;      }#ifndef NO_SEPARATE_SHORT_WORD      if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {	/* 墓さの没い帽胳を腾菇陇步しない(ここでは1不泪) */	/* sharelen=0でそのまま */	if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {	  jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");	  ok_p = FALSE;	}	wchmm->separated_word_count++;	continue;      }#endif#ifdef SEPARATE_BY_UNIGRAM      if (wchmm->ngram) {	p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i])#ifdef CLASS_NGRAM	  + wchmm->winfo->cprob[i]#endif	  ;
💿 文件大小 1116 K
👤 上传用户 a415834839
📂 所属分类网络
🏷️ 相关标签

#recognition #version #julius #about
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -