📄 realtime-1stpass.c
字号:
} else { init_param(mfcc); } /* フレ〖ムごとのパラメ〖タベクトル瘦赂の挝拌を澄瘦 */ /* あとで涩妥に炳じて凯墓される */ if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) { j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n"); } /* フレ〖ム眶をリセット */ /* reset frame count */ mfcc->f = 0; } /* 洁洒した param 菇陇挛のデ〖タのパラメ〖タ房を不读モデルとチェックする */ /* check type coherence between param and hmminfo here */ if (recog->jconf->input.paramtype_check_flag) { for(am=recog->amlist;am;am=am->next) { if (!check_param_coherence(am->hmminfo, am->mfcc->param)) { jlog("ERROR: input parameter type does not match AM\n"); return FALSE; } } } /* 纷换脱のワ〖クエリアを洁洒 */ /* prepare work area for calculation */ if (recog->jconf->input.type == INPUT_WAVEFORM) { reset_mfcc(recog); } /* 不读锑刨纷换脱キャッシュを洁洒 */ /* prepare cache area for acoustic computation of HMM states and mixtures */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), r->maxframelen); }#ifdef BACKEND_VAD if (recog->jconf->decodeopt.segment) { /* initialize segmentation parameters */ spsegment_init(recog); }#else recog->triggered = FALSE;#endif#ifdef DEBUG_VTLN_ALPHA_TEST /* store speech */ recog->speechlen = 0;#endif return TRUE;}/** * <JA> * @brief 不兰侨妨からパラメ〖タベクトルを纷换する. * * 岭帽疤で艰り叫された不兰侨妨からMFCCベクトルを纷换する. * 纷换冯蔡は mfcc->tmpmfcc に瘦赂される. * * @param mfcc [i/o] MFCC纷换インスタンス * @param window [in] 岭帽疤で艰り叫された不兰侨妨デ〖タ * @param windowlen [in] @a window の墓さ * * @return 纷换喇根箕·TRUE を手す. デルタ纷换において掐蜗フレ〖ムが * 警ないなど·まだ评られていない眷圭は FALSE を手す. * </JA> * <EN> * @brief Compute a parameter vector from a speech window. * * This function calculates an MFCC vector from speech data windowed from * input speech. The obtained MFCC vector will be stored to mfcc->tmpmfcc. * * @param mfcc [i/o] MFCC calculation instance * @param window [in] speech input (windowed from input stream) * @param windowlen [in] length of @a window * * @return TRUE on success (an vector obtained). Returns FALSE if no * parameter vector obtained yet (due to delta delay). * </EN> * * @callgraph * @callergraph * */booleanRealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen){ int i; boolean ret; VECT *tmpmfcc; Value *para; tmpmfcc = mfcc->tmpmfcc; para = mfcc->para; /* 不兰侨妨から base MFCC を纷换 (recog->mfccwrk を网脱) */ /* calculate base MFCC from waveform (use recog->mfccwrk) */ for (i=0; i < windowlen; i++) { mfcc->wrk->bf[i+1] = (float) window[i]; } WMP_calc(mfcc->wrk, tmpmfcc, para); if (para->energy && para->enormal) { /* 滦眶エネルギ〖灌を赖惮步する */ /* normalize log energy */ /* リアルタイム掐蜗では券厦ごとの呵络エネルギ〖が评られないので 木涟の券厦のパワ〖で洛脱する */ /* Since the maximum power of the whole input utterance cannot be obtained at real-time input, the maximum of last input will be used to normalize. */ tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para); } if (para->delta) { /* デルタを纷换する */ /* calc delta coefficients */ ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);#ifdef RDEBUG printf("DeltaBuf: ret=%d, status=", ret); for(i=0;i<mfcc->db->len;i++) { printf("%d", mfcc->db->is_on[i]); } printf(", nextstore=%d\n", mfcc->db->store);#endif /* ret == FALSE のときはまだディレイ面なので千急借妄せず肌掐蜗へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* db->vec に附哼の傅デ〖タとデルタ犯眶が掐っているので tmpmfcc にコピ〖 */ /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */ memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); } if (para->acc) { /* Accelerationを纷换する */ /* calc acceleration coefficients */ /* base+delta をそのまま掐れる */ /* send the whole base+delta to the cycle buffer */ ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);#ifdef RDEBUG printf("AccelBuf: ret=%d, status=", ret); for(i=0;i<mfcc->ab->len;i++) { printf("%d", mfcc->ab->is_on[i]); } printf(", nextstore=%d\n", mfcc->ab->store);#endif /* ret == FALSE のときはまだディレイ面なので千急借妄せず肌掐蜗へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* ab->vec には·(base+delta) とその汗尸犯眶が掐っている. [base] [delta] [delta] [acc] の界で掐っているので, [base] [delta] [acc] を tmpmfcc にコピ〖する. */ /* now ab->vec holds the current (base+delta) and their delta coef. it holds a vector in the order of [base] [delta] [delta] [acc], so copy the [base], [delta] and [acc] to tmpmfcc. */ memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2); memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen); }#ifdef POWER_REJECT if (para->energy || para->c0) { mfcc->avg_power += tmpmfcc[para->baselen-1]; }#endif if (para->delta && (para->energy || para->c0) && para->absesup) { /* 冷滦猛パワ〖を近殿 */ /* suppress absolute power */ memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen)); } /* この箕爬で tmpmfcc に附箕爬での呵糠の泼魔ベクトルが呈羌されている */ /* tmpmfcc[] now holds the latest parameter vector */ /* CMN を纷换 */ /* perform CMN */ if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); return TRUE;}static intproceed_one_frame(Recog *recog){ MFCCCalc *mfcc; RealBeam *r; int maxf; PROCESS_AM *am; int rewind_frame; boolean reprocess; boolean ok_p; r = &(recog->real); /* call recognition start callback */ ok_p = FALSE; maxf = 0; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; if (maxf < mfcc->f) maxf = mfcc->f; if (mfcc->f == 0) { ok_p = TRUE; } } if (ok_p && maxf == 0) { /* call callback when at least one of MFCC has initial frame */ if (recog->jconf->decodeopt.segment) {#ifdef BACKEND_VAD /* not exec pass1 begin callback here */#else if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE;#endif } else { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } /* 称インスタンスについて mfcc->f の千急借妄を1フレ〖ム渴める */ switch (decode_proceed(recog)) { case -1: /* error */ return -1; break; case 0: /* success */ break; case 1: /* segmented */ /* 千急借妄のセグメント妥滇で姜わったことをフラグにセット */ /* set flag which indicates that the input has ended with segmentation request */ r->last_is_segmented = TRUE; /* tell the caller to be segmented by this function */ /* 钙び叫し傅に·ここで掐蜗を磊るよう帕える */ return 1; }#ifdef BACKEND_VAD /* check up trigger in case of VAD segmentation */ if (recog->jconf->decodeopt.segment) { if (recog->triggered == FALSE) { if (spsegment_trigger_sync(recog)) { if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } }#endif if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { /* set total length to the current frame */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->param->header.samplenum = mfcc->f + 1; mfcc->param->samplenum = mfcc->f + 1; } /* do rewind for all mfcc here */ spsegment_restart_mfccs(recog, rewind_frame, reprocess); /* also tell adin module to rehash the concurrent audio input */ recog->adin->rehash = TRUE; /* reset outprob cache for all AM */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); } if (reprocess) { /* process the backstep MFCCs here */ while(1) { ok_p = TRUE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; mfcc->f++; if (mfcc->f < mfcc->param->samplenum) { mfcc->valid = TRUE; ok_p = FALSE; } else { mfcc->valid = FALSE; } } if (ok_p) { /* すべての MFCC が姜わりに茫したのでル〖プ姜位 */ /* all MFCC has been processed, end of loop */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; mfcc->f--; } break; } /* 称インスタンスについて mfcc->f の千急借妄を1フレ〖ム渴める */ switch (decode_proceed(recog)) { case -1: /* error */ return -1; break; case 0: /* success */ break; case 1: /* segmented */ /* ignore segmentation while in the backstep segment */ break; } /* call frame-wise callback */ callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); } } } /* call frame-wise callback if at least one of MFCC is valid at this frame */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->valid) { callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); break; } } return 0;}/** * <JA> * @brief 妈1パス士乖不兰千急借妄のメイン * * この簇眶柒では·敛肌弄な泼魔翁藐叫および妈1パスの千急が乖われる. * 掐蜗デ〖タに滦して岭齿けˇシフトを乖いMFCC纷换を乖いながら· * 不兰千急を1フレ〖ムずつ事误悸乖する. * * 千急借妄∈decode_proceed()∷において·不兰惰粗姜位が妥滇される * ことがある. この眷圭·踏借妄の不兰を瘦赂して妈1パスを姜位する * よう钙叫傅に妥滇する. * * SPSEGMENT_NAIST あるいは GMM_VAD などのバックエンドVAD年盗箕は·デコ〖ダベ〖スの * VAD ∈不兰惰粗倡幌浮叫∷に燃うデコ〖ディング扩告が乖われる. * トリガ涟は·千急借妄が钙ばれるが·悸狠には称簇眶柒で千急借妄は * 乖われていない. 倡幌を浮叫した箕·この簇眶はそこまでに评られた * MFCC误を办年フレ〖ム墓尸船提し·その船提し黎から奶撅の千急借妄を * 浩倡する. なお·剩眶借妄インスタンス粗がある眷圭·倡幌トリガは * どれかのインスタンスが浮叫した箕爬で链ての倡幌が票袋される. * * この簇眶は·不兰掐蜗ル〖チンのコ〖ルバックとして钙ばれる. * 不兰デ〖タの眶篱サンプル峡不ごとにこの簇眶が钙び叫される. * * @param Speech [in] 不兰デ〖タへのバッファへのポインタ * @param nowlen [in] 不兰デ〖タの墓さ * @param recog [i/o] engine instance * * @return エラ〖箕に -1 を·赖撅箕に 0 を手す. また·妈1パスを * 姜位するよう钙叫傅に妥滇するときは 1 を手す. * </JA> * <EN> * @brief Main function of the on-the-fly 1st pass decoding * * This function performs sucessive MFCC calculation and 1st pass decoding. * The given input data are windowed to a certain length, then converted * to MFCC, and decoding for the input frame will be performed in one * process cycle. The loop cycle will continue with window shift, until * the whole given input has been processed. * * In case of input segment request from decoding process (in * decode_proceed()), this function keeps the rest un-processed speech * to a buffer and tell the caller to stop input and end the 1st pass. * * When back-end VAD such as SPSEGMENT_NAIST or GMM_VAD is defined, Decoder-based * VAD is enabled and its decoding control will be managed here. * In decoder-based VAD mode, the recognition will be processed but * no output will be done at the first un-triggering input area. * when speech input start is detected, this function will rewind the * already obtained MFCC sequence to a certain frames, and re-start * normal recognition at that point. When multiple recognition process * instance is running, their segmentation will be synchronized. * * This function will be called each time a new speech sample comes as * as callback from A/D-in routine. * * @param Speech [in] pointer to the speech sample segments * @param nowlen [in] length of above * @param recog [i/o] engine instance * * @return -1 on error (tell caller to terminate), 0 on success (allow caller * to call me for the next segment). It returns 1 when telling the caller to * terminate input and go on to the next pass. * </EN> * * @callgraph
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -