📄 realtime-1stpass.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 4 页
字号:
    } else {      init_param(mfcc);    }    /* フレ〖ムごとのパラメ〖タベクトル瘦赂の挝拌を澄瘦 */    /* あとで涩妥に炳じて凯墓される */    if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) {      j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");    }    /* フレ〖ム眶をリセット */    /* reset frame count */    mfcc->f = 0;  }  /* 洁洒した param 菇陇挛のデ〖タのパラメ〖タ房を不读モデルとチェックする */  /* check type coherence between param and hmminfo here */  if (recog->jconf->input.paramtype_check_flag) {    for(am=recog->amlist;am;am=am->next) {      if (!check_param_coherence(am->hmminfo, am->mfcc->param)) {	jlog("ERROR: input parameter type does not match AM\n");	return FALSE;      }    }  }  /* 纷换脱のワ〖クエリアを洁洒 */  /* prepare work area for calculation */  if (recog->jconf->input.type == INPUT_WAVEFORM) {    reset_mfcc(recog);  }  /* 不读锑刨纷换脱キャッシュを洁洒 */  /* prepare cache area for acoustic computation of HMM states and mixtures */  for(am=recog->amlist;am;am=am->next) {    outprob_prepare(&(am->hmmwrk), r->maxframelen);  }#ifdef BACKEND_VAD  if (recog->jconf->decodeopt.segment) {    /* initialize segmentation parameters */    spsegment_init(recog);  }#else  recog->triggered = FALSE;#endif#ifdef DEBUG_VTLN_ALPHA_TEST  /* store speech */  recog->speechlen = 0;#endif  return TRUE;}/**  * <JA> * @brief  不兰侨妨からパラメ〖タベクトルを纷换する. *  * 岭帽疤で艰り叫された不兰侨妨からMFCCベクトルを纷换する. * 纷换冯蔡は mfcc->tmpmfcc に瘦赂される.  *  * @param mfcc [i/o] MFCC纷换インスタンス * @param window [in] 岭帽疤で艰り叫された不兰侨妨デ〖タ * @param windowlen [in] @a window の墓さ *  * @return 纷换喇根箕·TRUE を手す. デルタ纷换において掐蜗フレ〖ムが * 警ないなど·まだ评られていない眷圭は FALSE を手す.  * </JA> * <EN> * @brief  Compute a parameter vector from a speech window. * * This function calculates an MFCC vector from speech data windowed from * input speech.  The obtained MFCC vector will be stored to mfcc->tmpmfcc. *  * @param mfcc [i/o] MFCC calculation instance * @param window [in] speech input (windowed from input stream) * @param windowlen [in] length of @a window *  * @return TRUE on success (an vector obtained).  Returns FALSE if no * parameter vector obtained yet (due to delta delay). * </EN> * * @callgraph * @callergraph *  */booleanRealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen){  int i;  boolean ret;  VECT *tmpmfcc;  Value *para;  tmpmfcc = mfcc->tmpmfcc;  para = mfcc->para;  /* 不兰侨妨から base MFCC を纷换 (recog->mfccwrk を网脱) */  /* calculate base MFCC from waveform (use recog->mfccwrk) */  for (i=0; i < windowlen; i++) {    mfcc->wrk->bf[i+1] = (float) window[i];  }  WMP_calc(mfcc->wrk, tmpmfcc, para);  if (para->energy && para->enormal) {    /* 滦眶エネルギ〖灌を赖惮步する */    /* normalize log energy */    /* リアルタイム掐蜗では券厦ごとの呵络エネルギ〖が评られないので       木涟の券厦のパワ〖で洛脱する */    /* Since the maximum power of the whole input utterance cannot be       obtained at real-time input, the maximum of last input will be       used to normalize.    */    tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para);  }  if (para->delta) {    /* デルタを纷换する */    /* calc delta coefficients */    ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);#ifdef RDEBUG    printf("DeltaBuf: ret=%d, status=", ret);    for(i=0;i<mfcc->db->len;i++) {      printf("%d", mfcc->db->is_on[i]);    }    printf(", nextstore=%d\n", mfcc->db->store);#endif    /* ret == FALSE のときはまだディレイ面なので千急借妄せず肌掐蜗へ */    /* if ret == FALSE, there is no available frame.  So just wait for       next input */    if (! ret) {      return FALSE;    }    /* db->vec に附哼の傅デ〖タとデルタ犯眶が掐っているので tmpmfcc にコピ〖 */    /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */    memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);  }  if (para->acc) {    /* Accelerationを纷换する */    /* calc acceleration coefficients */    /* base+delta をそのまま掐れる */    /* send the whole base+delta to the cycle buffer */    ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);#ifdef RDEBUG    printf("AccelBuf: ret=%d, status=", ret);    for(i=0;i<mfcc->ab->len;i++) {      printf("%d", mfcc->ab->is_on[i]);    }    printf(", nextstore=%d\n", mfcc->ab->store);#endif    /* ret == FALSE のときはまだディレイ面なので千急借妄せず肌掐蜗へ */    /* if ret == FALSE, there is no available frame.  So just wait for       next input */    if (! ret) {      return FALSE;    }    /* ab->vec には·(base+delta) とその汗尸犯眶が掐っている.        [base] [delta] [delta] [acc] の界で掐っているので,       [base] [delta] [acc] を tmpmfcc にコピ〖する. */    /* now ab->vec holds the current (base+delta) and their delta coef.        it holds a vector in the order of [base] [delta] [delta] [acc],        so copy the [base], [delta] and [acc] to tmpmfcc.  */    memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2);    memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen);  }#ifdef POWER_REJECT  if (para->energy || para->c0) {    mfcc->avg_power += tmpmfcc[para->baselen-1];  }#endif  if (para->delta && (para->energy || para->c0) && para->absesup) {    /* 冷滦猛パワ〖を近殿 */    /* suppress absolute power */    memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen));  }  /* この箕爬で tmpmfcc に附箕爬での呵糠の泼魔ベクトルが呈羌されている */  /* tmpmfcc[] now holds the latest parameter vector */  /* CMN を纷换 */  /* perform CMN */  if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc);  return TRUE;}static intproceed_one_frame(Recog *recog){  MFCCCalc *mfcc;  RealBeam *r;  int maxf;  PROCESS_AM *am;  int rewind_frame;  boolean reprocess;  boolean ok_p;  r = &(recog->real);  /* call recognition start callback */  ok_p = FALSE;  maxf = 0;  for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {    if (!mfcc->valid) continue;    if (maxf < mfcc->f) maxf = mfcc->f;    if (mfcc->f == 0) {      ok_p = TRUE;    }  }  if (ok_p && maxf == 0) {    /* call callback when at least one of MFCC has initial frame */    if (recog->jconf->decodeopt.segment) {#ifdef BACKEND_VAD      /* not exec pass1 begin callback here */#else      if (!recog->process_segment) {	callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);      }      callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);      callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);      recog->triggered = TRUE;#endif    } else {      callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);      callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);      recog->triggered = TRUE;    }  }  /* 称インスタンスについて mfcc->f の千急借妄を1フレ〖ム渴める */  switch (decode_proceed(recog)) {  case -1: /* error */    return -1;    break;  case 0:			/* success */    break;  case 1:			/* segmented */    /* 千急借妄のセグメント妥滇で姜わったことをフラグにセット */    /* set flag which indicates that the input has ended with segmentation request */    r->last_is_segmented = TRUE;    /* tell the caller to be segmented by this function */    /* 钙び叫し傅に·ここで掐蜗を磊るよう帕える */    return 1;  }#ifdef BACKEND_VAD  /* check up trigger in case of VAD segmentation */  if (recog->jconf->decodeopt.segment) {    if (recog->triggered == FALSE) {      if (spsegment_trigger_sync(recog)) {	if (!recog->process_segment) {	  callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);	}	callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);	callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);	recog->triggered = TRUE;      }    }  }#endif    if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {    /* set total length to the current frame */    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {      if (!mfcc->valid) continue;      mfcc->param->header.samplenum = mfcc->f + 1;      mfcc->param->samplenum = mfcc->f + 1;    }    /* do rewind for all mfcc here */    spsegment_restart_mfccs(recog, rewind_frame, reprocess);    /* also tell adin module to rehash the concurrent audio input */    recog->adin->rehash = TRUE;    /* reset outprob cache for all AM */    for(am=recog->amlist;am;am=am->next) {      outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);    }    if (reprocess) {      /* process the backstep MFCCs here */      while(1) {	ok_p = TRUE;	for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {	  if (! mfcc->valid) continue;	  mfcc->f++;	  if (mfcc->f < mfcc->param->samplenum) {	    mfcc->valid = TRUE;	    ok_p = FALSE;	  } else {	    mfcc->valid = FALSE;	  }	}	if (ok_p) {	  /* すべての MFCC が姜わりに茫したのでル〖プ姜位 */	  /* all MFCC has been processed, end of loop  */	  for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {	    if (! mfcc->valid) continue;	    mfcc->f--;	  }	  break;	}	/* 称インスタンスについて mfcc->f の千急借妄を1フレ〖ム渴める */	switch (decode_proceed(recog)) {	case -1: /* error */	  return -1;	  break;	case 0:			/* success */	  break;	case 1:			/* segmented */	  /* ignore segmentation while in the backstep segment */	  break;	}	/* call frame-wise callback */	callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);      }    }  }  /* call frame-wise callback if at least one of MFCC is valid at this frame */  for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {    if (mfcc->valid) {      callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);      break;    }  }    return 0;}/**  * <JA> * @brief  妈1パス士乖不兰千急借妄のメイン * * この簇眶柒では·敛肌弄な泼魔翁藐叫および妈1パスの千急が乖われる.  * 掐蜗デ〖タに滦して岭齿けˇシフトを乖いMFCC纷换を乖いながら· * 不兰千急を1フレ〖ムずつ事误悸乖する.  * * 千急借妄∈decode_proceed()∷において·不兰惰粗姜位が妥滇される * ことがある. この眷圭·踏借妄の不兰を瘦赂して妈1パスを姜位する * よう钙叫傅に妥滇する.  * * SPSEGMENT_NAIST あるいは GMM_VAD などのバックエンドVAD年盗箕は·デコ〖ダベ〖スの * VAD ∈不兰惰粗倡幌浮叫∷に燃うデコ〖ディング扩告が乖われる.  * トリガ涟は·千急借妄が钙ばれるが·悸狠には称簇眶柒で千急借妄は * 乖われていない. 倡幌を浮叫した箕·この簇眶はそこまでに评られた * MFCC误を办年フレ〖ム墓尸船提し·その船提し黎から奶撅の千急借妄を * 浩倡する. なお·剩眶借妄インスタンス粗がある眷圭·倡幌トリガは * どれかのインスタンスが浮叫した箕爬で链ての倡幌が票袋される.  *  * この簇眶は·不兰掐蜗ル〖チンのコ〖ルバックとして钙ばれる. * 不兰デ〖タの眶篱サンプル峡不ごとにこの簇眶が钙び叫される.  *  * @param Speech [in] 不兰デ〖タへのバッファへのポインタ * @param nowlen [in] 不兰デ〖タの墓さ * @param recog [i/o] engine instance *  * @return エラ〖箕に -1 を·赖撅箕に 0 を手す. また·妈1パスを * 姜位するよう钙叫傅に妥滇するときは 1 を手す.  * </JA> * <EN> * @brief  Main function of the on-the-fly 1st pass decoding * * This function performs sucessive MFCC calculation and 1st pass decoding. * The given input data are windowed to a certain length, then converted * to MFCC, and decoding for the input frame will be performed in one * process cycle.  The loop cycle will continue with window shift, until * the whole given input has been processed. * * In case of input segment request from decoding process (in * decode_proceed()), this function keeps the rest un-processed speech * to a buffer and tell the caller to stop input and end the 1st pass. * * When back-end VAD such as SPSEGMENT_NAIST or GMM_VAD is defined,  Decoder-based * VAD is enabled and its decoding control will be managed here. * In decoder-based VAD mode, the recognition will be processed but * no output will be done at the first un-triggering input area. * when speech input start is detected, this function will rewind the * already obtained MFCC sequence to a certain frames, and re-start * normal recognition at that point.  When multiple recognition process * instance is running, their segmentation will be synchronized. *  * This function will be called each time a new speech sample comes as * as callback from A/D-in routine. *  * @param Speech [in] pointer to the speech sample segments * @param nowlen [in] length of above * @param recog [i/o] engine instance *  * @return -1 on error (tell caller to terminate), 0 on success (allow caller * to call me for the next segment).  It returns 1 when telling the caller to * terminate input and go on to the next pass. * </EN> * * @callgraph
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -