📄 word_align.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/** * @file   word_align.c *  * <JA> * @brief  帽胳ˇ不燎ˇ觉轮帽疤のアラインメント * * ここでは·千急冯蔡に滦する掐蜗不兰のアラインメントを叫蜗するための * 簇眶が年盗されています.  * * Julius/Julian では·千急冯蔡においてその帽胳や不燎·あるいはHMMの觉轮が * それぞれ掐蜗不兰のどの惰粗にマッチしたのかを梦ることができます.  * より赖澄なアラインメントを滇めるために·Julius/Julian では千急面の * 夺击を崔む攫鼠は脱いずに·千急が姜わった稿に评られた千急冯蔡の帽胳误に * 滦して·あらためて forced alignment を悸乖しています.  * </JA> *  * <EN> * @brief  Forced alignment by word / phoneme / state unit. * * This file defines functions for performing forced alignment of * recognized words.  The forced alignment is implimented in Julius/Julian * to get the best matching segmentation of recognized word sequence * upon input speech.  Word-level, phoneme-level and HMM state-level * alignment can be obtained. * * Julius/Julian performs the forced alignment as a post-processing of * recognition process.  Recomputation of Viterbi path on the recognized * word sequence toward input speech will be done after the recognition * to get better alignment. * * </EN> *  * @author Akinobu Lee * @date   Sat Sep 24 16:09:46 2005 * * $Revision: 1.5 $ *  *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>/**  * <JA> * 涂えられた帽胳误からHMMを息冯して矢链挛のHMMを菇蜜する.  *  * @param wseq [in] 帽胳误 * @param num [in] @a wseq の眶 * @param has_sp_ret [out] ショ〖トポ〖ズを稿鲁に赁掐しうるユニットの攫鼠 * @param num_ret [out] 菇蜜されたHMMに崔まれる不燎HMMの眶 * @param end_ret [out] アラインメントの惰磊りとなる觉轮戎规の误 * @param per_what [in] 帽胳ˇ不燎ˇ觉轮のどの帽疤でアラインメントを艰るかを回年 * @param r [in] 千急借妄インスタンス *  * @return あらたに充り烧けられた矢链挛をあらわすHMMモデル误へのポインタを手す.  * </JA> * <EN> * Make the whole sentence HMM from given word sequence by connecting * each phoneme HMM. *  * @param wseq [in] word sequence to align * @param num [in] number of @a wseq * @param has_sp_ret [out] unit information of whether it can be followed by a short-pause * @param num_ret [out] number of HMM contained in the generated sentence HMM * @param end_ret [out] sequence of state location as alignment unit * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param r [in] recognition process instance *  * @return newly malloced HMM sequences. * </EN> */static HMM_Logical **make_phseq(WORD_ID *wseq, short num, boolean **has_sp_ret, int *num_ret, int **end_ret, int per_what, 	   RecogProcess *r){  HMM_Logical **ph;		/* phoneme sequence */  boolean *has_sp;  int k;  int phnum;			/* num of above */  WORD_ID tmpw, w;  int i, j, pn, st, endn;  HMM_Logical *tmpp, *ret;  WORD_INFO *winfo;  HTK_HMM_INFO *hmminfo;  boolean enable_iwsp;		/* for multipath */  winfo = r->lm->winfo;  hmminfo = r->am->hmminfo;  if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;  /* make ph[] from wseq[] */  /* 1. calc total phone num and malloc */  phnum = 0;  for (w=0;w<num;w++) phnum += winfo->wlen[wseq[w]];  ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum);    if (hmminfo->multipath && enable_iwsp) {    has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum);  } else {    has_sp = NULL;  }  /* 2. make phoneme sequence */  st = 0;  if (hmminfo->multipath) st++;  pn = 0;  endn = 0;  for (w=0;w<num;w++) {    tmpw = wseq[w];    for (i=0;i<winfo->wlen[tmpw];i++) {      tmpp = winfo->wseq[tmpw][i];      /* handle cross-word context dependency */      if (r->ccd_flag) {	if (w > 0 && i == 0) {	/* word head */	  	  if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) {	    tmpp = ret;	  }	  /* if triphone not found, fallback to bi/mono-phone  */	  /* use pseudo phone when no bi-phone found in alignment... */	}	if (w < num-1 && i == winfo->wlen[tmpw] - 1) { /* word tail */	  if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) {	    tmpp = ret;	  }	}      }      ph[pn] = tmpp;      if (hmminfo->multipath && enable_iwsp) {	if (i == winfo->wlen[tmpw] - 1) {	  has_sp[pn] = TRUE;	} else {	  has_sp[pn] = FALSE;	}      }      if (per_what == PER_STATE) {	for (j=0;j<hmm_logical_state_num(tmpp)-2;j++) {	  (*end_ret)[endn++] = st + j;	}	if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {	  for (k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {	    (*end_ret)[endn++] = st + j + k;	  }	}      }      st += hmm_logical_state_num(tmpp) - 2;      if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {	st += hmm_logical_state_num(hmminfo->sp) - 2;      }      if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1;      pn++;    }    if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1;  }  *num_ret = phnum;  *has_sp_ret = has_sp;  return ph;}/**  * <JA> * 矢链挛のHMMを菇蜜し·Viterbiアラインメントを悸乖し·冯蔡を叫蜗する.  *  * @param words [in] 矢簿棱をあらわす帽胳误 * @param wnum [in] @a words の墓さ * @param param [in] 掐蜗泼魔パラメ〖タ误 * @param per_what [in] 帽胳ˇ不燎ˇ觉轮のどの帽疤でアラインメントを艰るかを回年 * @param align [out] アラインメント冯蔡を呈羌するSentence菇陇挛 * @param r [i/o] 千急借妄インスタンス * </JA> * <EN> * Build sentence HMM, call viterbi_segment() and output result. *  * @param words [in] word sequence of the sentence * @param wnum [in] number of words in @a words * @param param [in] input parameter vector * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param s [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> */static voiddo_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r){  HMM_Logical **phones;		/* phoneme sequence */  boolean *has_sp;		/* whether phone can follow short pause */  int k;  int phonenum;			/* num of above */  HMM *shmm;			/* sentence HMM */  int *end_state;		/* state number of word ends */  int *end_frame;		/* segmented last frame of words */  LOGPROB *end_score;		/* normalized score of each words */  LOGPROB allscore;		/* total score of this word sequence */  WORD_ID w;  int i, rlen;  int end_num = 0;  int *id_seq, *phloc = NULL, *stloc = NULL;  int j,n,p;  WORD_INFO *winfo;  HTK_HMM_INFO *hmminfo;  boolean enable_iwsp;		/* for multipath */  winfo = r->lm->winfo;  hmminfo = r->am->hmminfo;  if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;  /* initialize result storage buffer */  switch(per_what) {  case PER_WORD:    jlog("ALIGN: === word alignment begin ===\n");    end_num = wnum;    phloc = (int *)mymalloc(sizeof(int)*wnum);    i = 0;    for(w=0;w<wnum;w++) {      phloc[w] = i;      i += winfo->wlen[words[w]];    }    break;  case PER_PHONEME:    jlog("ALIGN: === phoneme alignment begin ===\n");    end_num = 0;    for(w=0;w<wnum;w++) end_num += winfo->wlen[words[w]];    break;  case PER_STATE:    jlog("ALIGN: === state alignment begin ===\n");    end_num = 0;    for(w=0;w<wnum;w++) {      for (i=0;i<winfo->wlen[words[w]]; i++) {	end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2;      }      if (hmminfo->multipath && enable_iwsp) {	end_num += hmm_logical_state_num(hmminfo->sp) - 2;      }    }    phloc = (int *)mymalloc(sizeof(int)*end_num);    stloc = (int *)mymalloc(sizeof(int)*end_num);    {      n = 0;      p = 0;      for(w=0;w<wnum;w++) {	for(i=0;i<winfo->wlen[words[w]]; i++) {	  for(j=0; j<hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; j++) {	    phloc[n] = p;	    stloc[n] = j + 1;	    n++;	  }	  if (hmminfo->multipath && enable_iwsp && i == winfo->wlen[words[w]] - 1) {	    for(k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {	      phloc[n] = p;	      stloc[n] = j + 1 + k + end_num;	      n++;	    }	  }	  p++;	}      }    }        break;  }  end_state = (int *)mymalloc(sizeof(int) * end_num);  /* make phoneme sequence word sequence */  phones = make_phseq(words, wnum, &has_sp, &phonenum, &end_state, per_what, r);  /* build the sentence HMMs */  shmm = new_make_word_hmm(hmminfo, phones, phonenum, has_sp);  if (shmm == NULL) {    j_internal_error("Error: failed to make word hmm for alignment\n");  }  /* call viterbi segmentation function */  allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen);  /* store result to s */  align->num = rlen;  align->unittype = per_what;  align->begin_frame = (int *)mymalloc(sizeof(int) * rlen);  align->end_frame   = (int *)mymalloc(sizeof(int) * rlen);  align->avgscore    = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen);  for(i=0;i<rlen;i++) {    align->begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1;    align->end_frame[i]   = end_frame[i];    align->avgscore[i]    = end_score[i];  }  switch(per_what) {  case PER_WORD:
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -