📄 spsegment.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/** * @file   spsegment.c *  * <EN> * @brief  Short-pause segmentation and decoder-based VAD * * In short-pause segmentation mode, Julius tries to find a "pause * frame" by watching the word hypotheses at each frame.  Julius treat * words with only a silence model as "pause word", and judge whether * the input frame is "pause frame" or not by watching if any of the * pause words gets maximum score at each frame.  Then it will segment the * input when the duration of pause frame reaches a limit. * * On normal short-pause segmentation (as of ver.3.x), the pause * frames will not be eliminated.  The input will be segment at the * frame where a speech begins after the pause frames, and the next * input will be processed from the beginning of the pause frames.  In * other words, the detected area of pause frames are processed twice, * as end-of-segment silence at the former input segment and * beginning-of-segment silence at the latter input segment. * * When SPSEGMENT_NAIST is defined, a long pause area will be dropped * from recognition.  When the detecting pause frames gets longer than * threshold, it segments the input at that point and skip the continuing * pauses until a speech frame comes.  The recognition process will * be kept with a special status while in the pause segment.  This scheme * works as a decoder-driven VAD. *  * </EN> *  * <JA> * @brief  ショ〖トポ〖ズセグメンテ〖ションおよびデコ〖ダベ〖スVAD * * ショ〖トポ〖ズセグメンテ〖ションでは·妈1パスにおいて≈痰不帽胳∽の * スコアをフレ〖ムごとに拇べ·それが办疤であるフレ〖ムを≈痰不フレ〖ム∽ * とします. そして·痰不フレ〖ムが办年笆惧のフレ〖ム眶にわたったときに· * 掐蜗をそこで惰磊ります.  * * ≈痰不帽胳∽は·帽胳辑今において·粕みが痰不に滦炳する１モデルのみから * なる帽胳を回します. 痰不モデルは -spmodel で回年されるモデル·および * N-gram モデル蝗脱箕は黎片ˇ琐萨の痰不モデルとされます∈汤绩弄に回年 * するには -pausemodels オプションを蝗脱します∷ * * 奶撅のショ〖トポ〖ズセグメンテ〖ション(Ver.3.x 笆涟と票霹)では·痰 * 不惰粗の近殿は乖われません. 掐蜗は·痰不フレ〖ム惰粗が姜位してふた * たび不兰がトリガした箕爬で惰磊られ·肌セグメントの千急はその痰不フ * レ〖ム惰粗の倡幌爬から浩倡されます. すなわち·浮叫された痰不惰粗は· * 涟セグメントの琐萨の痰不惰粗かつ肌セグメントの倡幌の痰不惰粗として· * セグメント粗でオ〖バ〖ラップして借妄されます.  * * SPSEGMENT_NAIST 年盗箕は·痰不フレ〖ム惰粗が墓い眷圭はそこでいったん * 掐蜗を惰磊り·肌の掐蜗浩倡までの粗の痰不惰粗をスキップするようになります.  * 痰不惰粗面も·簿棱を栏喇しない泼侍な千急觉轮に掐ることで· * 千急觉轮を瘦ちます. これによって·より痰不箕粗が墓い眷圭を鳞年した· * デコ〖ダベ〖スの VAD を乖うことが叫丸ます.  * </JA> *  * @author Akinobu Lee * @date   Wed Oct 17 12:47:29 2007 * * $Revision: 1.2 $ *  *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>/**  * <JA> * @brief  ショ〖トポ〖ズ帽胳かどうか冉年 * * 涂えられた帽胳がショ〖トポ〖ズ帽胳であるかどうか拇べる.  * * @param w [in] 帽胳ID * @param r [in] 不兰千急借妄インスタンス *  * @return ショ〖トポ〖ズ帽胳であれば TRUE·そうでなければ FALSE.  * </JA> * <EN> * Check if the fiven word is a short-pause word. * * @param w [in] word id * @param r [in] recognition process instance *  * @return TRUE if it is short pause word, FALSE if not. * </EN> *  * @callgraph * @callergraph */booleanis_sil(WORD_ID w, RecogProcess *r){  WORD_INFO *winfo;  HTK_HMM_INFO *hmm;  int i;  winfo = r->lm->winfo;  hmm = r->am->hmminfo;  /* num of phones should be 1 */  if (winfo->wlen[w] > 1) return FALSE;  if (r->pass1.pausemodel) {    /* has pause model list */    for(i=0;i<r->pass1.pausemodelnum;i++) {      if (strmatch(winfo->wseq[w][0]->name, r->pass1.pausemodel[i])) {        return TRUE;      }    }  } else {    /* short pause (specified by "-spmodel") */    if (winfo->wseq[w][0] == hmm->sp) return TRUE;        if (r->lmtype == LM_PROB) {      /* head/tail sil */      if (w == winfo->head_silwid || w == winfo->tail_silwid) return TRUE;    }  }  return FALSE;}/**  * <EN> * @brief  Split input parameter for segmentation. *  * Copy the rest samples in param to rest_param, and shrink the param * in mfcc instance.  [start...param->samplenum] will be copied to * rest_param, and [0...end] will be left in param. * </EN> * <JA> * @brief  セグメンテ〖ション箕に掐蜗パラメ〖タを尸充する.  *  * 荒りのサンプル∈附哼のフレ〖ムから姜わりまで∷を rest_param に * コピ〖し·傅の param を没くする. [start...param->samplenum] が * rest_param にコピ〖され·傅の param には [0...end] が荒る.  * </JA> *  * @param mfcc [i/o] MFCC calculation instance * @param start [in] copy start frame * @param end [in] original end frame *  * @callgraph * @callergraph */voidmfcc_copy_to_rest_and_shrink(MFCCCalc *mfcc, int start, int end){  int t;  /* copy rest parameters for next process */  mfcc->rest_param = new_param();  memcpy(&(mfcc->rest_param->header), &(mfcc->param->header), sizeof(HTK_Param_Header));  mfcc->rest_param->samplenum = mfcc->param->samplenum - start;  mfcc->rest_param->header.samplenum = mfcc->rest_param->samplenum;  mfcc->rest_param->veclen = mfcc->param->veclen;  if (param_alloc(mfcc->rest_param, mfcc->rest_param->samplenum, mfcc->rest_param->veclen) == FALSE) {    j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");  }  /* copy data */  for(t=start;t<mfcc->param->samplenum;t++) {    memcpy(mfcc->rest_param->parvec[t-start], mfcc->param->parvec[t], sizeof(VECT) * mfcc->rest_param->veclen);  }    /* shrink original param */  /* just shrink the length */  mfcc->param->samplenum = end;}/**  * <EN> * Shrink the parameter sequence.  Drop the first (p-1) frames and * move [p..samplenum] to 0. * </EN> * <JA> * パラメ〖タを没くする. 呵介の (p-1) フレ〖ムを久して·[p..samplenum] * のサンプルを呵介に低める.  * </JA> *  * @param mfcc [i/o] MFCC Calculation instance * @param p [in] frame point to remain *  * @callgraph * @callergraph */voidmfcc_shrink(MFCCCalc *mfcc, int p){  int t;  int len;  if (p > 0) {    /* copy data */    for(t=p;t<mfcc->param->samplenum;t++) {      memcpy(mfcc->param->parvec[t-p], mfcc->param->parvec[t], sizeof(VECT) * mfcc->param->veclen);    }    /* shrink original param */    /* just shrink the length */    len = mfcc->param->samplenum - p;    mfcc->param->samplenum = len;    mfcc->param->header.samplenum = len;  }}/**  * <JA> * @brief  券厦惰粗姜位の浮梦 *  * ショ〖トポ〖ズセグメンテ〖ション回年箕· * 券厦惰粗の姜位を浮叫する. 痰不帽胳が息鲁して呵锑铬输となるフレ〖ム眶を * カウントし·办年箕粗积鲁稿にふたたび不兰がトリガした箕爬で掐蜗を * 惰磊る.  * * SPSEGMENT_NAIST 年盗箕は·よりセグメント涟稿ˇ粗の痰不箕粗が墓い眷圭を * 鳞年したデコ〖ダベ〖スの VAD に磊り仑わる. この眷圭·不兰トリガ浮叫涟 * (r->pass1.after_triger == FALSE)では·簿棱を栏喇しない觉轮で千急借妄を * 鲁ける. 不兰倡幌を浮叫したら泼魔翁を办年墓 (r->config->successive.sp_margin) * 尸だけ船き提して·奶撅の千急を倡幌する(r->pass1.after_trigger == TRUE).  * 奶撅の千急面に痰不惰粗が墓く (r->config->successive.sp_frame_duration 笆惧) * 鲁いたら·そこで掐蜗を惰磊る.  *  * @param r [i/o] 不兰千急借妄インスタンス * @param time [in] 附哼の掐蜗フレ〖ム *  * @return TRUE (このフレ〖ムでの姜位を浮叫したら), FALSE (姜位でない眷圭) * </JA> * <EN> * @brief  Speech end point detection. *  * Detect end-of-input by duration of short-pause words when short-pause * segmentation is enabled.  When a pause word gets maximum score for a * successive frames, the segment will be treated as a pause frames. * When speech re-triggers, the current input will be segmented at that point. * * When SPSEGMENT_NAIST is defined, this function performs extended version * of the short pause segmentation, called "decoder-based VAD".  When before * speech trigger (r->pass1.after_trigger == FALSE), it tells the recognition * functions not to generate word trellis and continue calculation.  If a * speech trigger is found (not a pause word gets maximum score), the * input frames are 'rewinded' for a certain frame * (r->config->successive.sp_margin) and start the normal recognition * process from the rewinded frames (r->pass1.after_trigger = TRUE). * When a pause frame duration reaches a limit * (r->config->successive.sp_frame_duration), it terminate the search. *  * @param r [i/o] recognition process instance * @param time [in] current input frame *  * @return TRUE if end-of-input detected at this frame, FALSE if not. * </EN> * @callgraph * @callergraph */booleandetect_end_of_segment(RecogProcess *r, int time){  FSBeam *d;  TRELLIS_ATOM *tre;  LOGPROB maxscore = LOG_ZERO;  TRELLIS_ATOM *tremax = NULL;  int count = 0;  boolean detected = FALSE;#ifdef SPSEGMENT_NAIST  MFCCCalc *mfcc;  WORD_ID wid;  int j;  TOKEN2 *tk;  int startframe;#endif  d = &(r->pass1);#ifdef SPSEGMENT_NAIST  if (! d->after_trigger) {    /* we are in the first long pause segment before trigger */    /* find word end of maximum score from beam status */    for (j = d->n_start; j <= d->n_end; j++) {      tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);      if (r->wchmm->stend[tk->node] != WORD_INVALID) {        if (maxscore < tk->score) {          maxscore = tk->score;          wid = r->wchmm->stend[tk->node];        }      }    }    if (maxscore == LOG_ZERO) detected = TRUE;    else if (is_sil(wid, r)) detected = TRUE;     if (detected) {      /***********************/      /* this is noise frame */      /***********************/      /* reset trigger duration */      d->trigger_duration = 0;            /* if noise goes more than a certain frame, shrink the noise area         to avoid unlimited memory usage */      if (r->am->mfcc->f > SPSEGMENT_NAIST_AUTOSHRINK_LIMIT) {        d->want_rewind = TRUE;        d->rewind_frame = r->am->mfcc->f - r->config->successive.sp_margin;        d->want_rewind_reprocess = FALSE;
12 3 下一页
💿 文件大小 1116 K
👤 上传用户 a415834839
📂 所属分类网络
🏷️ 相关标签

#recognition #version #julius #about
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -