📄 gmm.c
字号:
for(d=gmm->start;d;d=d->next) { if (d->state_num > 3) { jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name); return FALSE; } } /* check if CMN needed */ /* allocate work area */ if (recog->gc == NULL) { gc = (GMMCalc *)mymalloc(sizeof(GMMCalc)); recog->gc = gc; } else { gc = recog->gc; } /* allocate buffers */ gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);#ifdef GMM_VAD gc->nframe = recog->jconf->detect.gmm_margin; gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);#endif gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum); i = 0; if (recog->jconf->reject.gmm_reject_cmn_string) { for(d=recog->gmm->start;d;d=d->next) { if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) { gc->is_voice[i] = FALSE; } else { gc->is_voice[i] = TRUE; } i++; } } else { for(d=recog->gmm->start;d;d=d->next) { gc->is_voice[i] = TRUE; i++; } } /* initialize work area */ gc->OP_nstream = gmm->opt.stream_info.num; for(i=0;i<gc->OP_nstream;i++) { gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i]; } gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num); /* check if variances are inversed */ if (!gmm->variance_inversed) { /* here, inverse all variance values for faster computation */ htk_hmm_inverse_variances(gmm); gmm->variance_inversed = TRUE; } return TRUE;}/** * <JA> * GMM纷换のための洁洒を乖なう. 1掐蜗倡幌ごとに钙ばれる. * * @param recog [i/o] エンジンインスタンス * </JA> * <EN> * Prepare for the next GMM computation. This will be called just before * an input begins. * * @param recog [i/o] engine instance * </EN> * * @callgraph * @callergraph */voidgmm_prepare(Recog *recog){ HTK_HMM_Data *d; int i; /* initialize score buffer and frame count */ i = 0; for(d=recog->gmm->start;d;d=d->next) { recog->gc->gmm_score[i] = 0.0; i++; }#ifdef GMM_VAD for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0; recog->gc->framep = 0; recog->gc->filled = FALSE; recog->gc->in_voice = FALSE;#endif recog->gc->framecount = 0;#ifdef GMM_VAD_DEBUG printf("GMM_VAD: init\n");#endif}/** * <JA> * 涂えられた掐蜗ベクトル误惧のあるフレ〖ムについて·链GMMのスコアを纷换し· * 纷换冯蔡を gmm_score に姥换する. * * GMM_VAD 年盗箕は·稿で VAD 冉年するために·册殿 jconf->detect.gmm_margin * フレ〖ム尸の VAD スコア ∈不兰GMMの呵络スコア - 花不GMMの呵络スコア∷が * 瘦赂される. * * @param recog [i/o] エンジンインスタンス * </JA> * <EN> * Compute output probabilities of all GMM for a given input vector, and * accumulate the results to the gmm_score buffer. * * When GMM_VAD is defined, VAD scores, * "(maximum score of speech GMMs) - (maximum score of noise GMMs)" of * last frames (jconf->detect.gmm_margin) will be stored for later VAD * decision. * * @param recog [i/o] engine instance * </EN> * * @callgraph * @callergraph */voidgmm_proceed(Recog *recog){ HTK_HMM_Data *d; GMMCalc *gc; int i; MFCCCalc *mfcc; LOGPROB score;#ifdef GMM_VAD LOGPROB max_n; LOGPROB max_v;#endif mfcc = recog->gmmmfcc; gc = recog->gc; if (!mfcc->valid) return; gc->framecount++;#ifdef GMM_VAD max_n = max_v = LOG_ZERO;#endif i = 0; for(d=recog->gmm->start;d;d=d->next) { score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param); gc->gmm_score[i] += score;#ifdef GMM_VAD if (gc->is_voice[i]) { if (max_v < score) max_v = score; } else { if (max_n < score) max_n = score; }#endif#ifdef MES jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);#endif i++; }#ifdef GMM_VAD#ifdef GMM_VAD_DEBUG //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep);#endif /* set rate of this frame */ gc->rates[gc->framep] = max_v - max_n;#ifdef GMM_VAD_DEBUG printf("GMM_VAD: %f\n", max_v - max_n);#endif /* increment current frame pointer */ gc->framep++; /* if reached end, go to start point */ if (gc->framep >= gc->nframe) { gc->filled = TRUE; gc->framep = 0; }#endif}/** * <JA> * @brief GMMの纷换を姜位し·冯蔡を叫蜗する. * * gmm_proceed() によって芜姥された称フレ〖ムごとのスコアから· * 呵络スコアのGMMを疯年する. その祸稿澄唯に答づく慨完刨を纷换し * 呵姜弄な冯蔡を result_gmm() によって叫蜗する. * * @param recog [i/o] エンジンインスタンス * </JA> * <EN> * @brief Finish the GMM computation for an input, and output the result. * * The GMM of the maximum score is finally determined from the accumulated * scores computed by gmm_proceed(), and compute the confidence score of the * maximum GMM using posterior probability. Then the result will be output * using result_gmm(). * * @param recog [i/o] engine instance * </EN> * * @callgraph * @callergraph */voidgmm_end(Recog *recog){ HTK_HMM_INFO *gmm; LOGPROB *score; HTK_HMM_Data *d; LOGPROB maxprob; HTK_HMM_Data *dmax;#ifdef CONFIDENCE_MEASURE LOGPROB sum;#endif int i; int maxid; if (recog->gc->framecount == 0) return; gmm = recog->gmm; score = recog->gc->gmm_score; /* get max score */ i = 0; maxprob = LOG_ZERO; dmax = NULL; maxid = 0; for(d=gmm->start;d;d=d->next) { if (maxprob < score[i]) { dmax = d; maxprob = score[i]; maxid = i; } i++; } recog->gc->max_d = dmax; recog->gc->max_i = maxid;#ifdef CONFIDENCE_MEASURE /* compute CM */ sum = 0.0; i = 0; for(d=gmm->start;d;d=d->next) { //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob)); sum += pow(10, 0.05 * (score[i] - maxprob)); i++; } recog->gc->gmm_max_cm = 1.0 / sum;#endif /* output result */ callback_exec(CALLBACK_RESULT_GMM, recog);}/** * <JA> * GMMの急侍冯蔡·呵稿の掐蜗が不兰掐蜗として铜跟であったか * 痰跟であったかを手す. * * @param recog [i/o] エンジンインスタンス * * @return 办疤のGMMの叹涟が gmm_reject_cmn_string 柒に痰ければ valid として * TRUE, あれば invalid として FALSE を手す. * </JA> * <EN> * Return whether the last input was valid or invalid, from the result of * GMM computation. * * @param recog [i/o] engine instance * * @return TRUE if input is valid, i.e. the name of maximum GMM is not included * in gmm_reject_cmn_string, or FALSE if input is invalid, i.e. the name is * included in that string. * </EN> * * @callgraph * @callergraph */booleangmm_valid_input(Recog *recog){ if (recog->gc->max_d == NULL) return FALSE; if (recog->gc->is_voice[recog->gc->max_i]) { return TRUE; } return FALSE;}/** * <EN> * Free work area used for GMM calculation. * </EN> * <JA> * GMM纷换に脱いたワ〖クエリアを倡庶する. * </JA> * * @param recog [i/o] engine instance * * @callgraph * @callergraph * */voidgmm_free(Recog *recog){ if (recog->gc) { free(recog->gc->OP_calced_score); free(recog->gc->OP_calced_id); free(recog->gc->is_voice);#ifdef GMM_VAD free(recog->gc->rates);#endif free(recog->gc->gmm_score); free(recog->gc); recog->gc = NULL; }}#ifdef GMM_VAD/** * <EN> * Compute score of voice activity from the last (jconf->detect.gmm_margin) * frames. Positive value designates speech, and negative means noise. * </EN> * <JA> * 木涟の (jconf->detect.gmm_margin) フレ〖ム尸のスコアから * voice activity のスコアを纷换する. 赖の猛は不兰·砷の猛は花不を山す. * </JA> * * @param gc [i/o] work area for GMM calculation * @param mean_ret [out] mean value of last (jconf->detect.gmm_margin) frames * @param var_ret [out] variance of last (jconf->detect.gmm_margin) frames * @param count_ret [out] count of speech frames in last (jconf->detect.gmm_margin) frames * */static voidvoice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret){ int i, len; LOGPROB mean; LOGPROB var; LOGPROB x; int count; if (!gc->filled) { /* cycle buffer not filled yet */ *mean_ret = 0.0; *var_ret = 0.0; *count_ret = 0; return; } if (gc->filled) { len = gc->nframe; } else { len = gc->framep; } mean = 0; count = 0; for(i=0;i<len;i++) { mean += gc->rates[i]; if (gc->rates[i] > 0.0) count++; } mean /= (float)len; var = 0.0; for(i=0;i<len;i++) { x = mean - gc->rates[i]; var += x * x; } var /= (float)len; *mean_ret = mean; *var_ret = var; *count_ret = count;}/** * <EN> * Check if trigger of speech / noise segment. If we are in noise segment * and some speech input begins at this frame, recog->gc->up_trigger will * be set to TRUE. If current is in speech segment and it ended at * this frame, recog->gc->down_trigger will be set to FALSE. * </EN> * <JA> * 不兰/润不兰惰粗の惰磊りを浮梦する. これまでが润不兰惰粗でこのフレ〖ムで * 不兰トリガを浮梦したとき·recog->gc->up_trigger を TRUE にセットする. 附哼 * 不兰惰粗で惰粗姜位を浮梦したとき·recog->gc->down_trigger を TRUE に * セットする. * </JA> * * @param recog [i/o] engine instance * * @callgraph * @callergraph */voidgmm_check_trigger(Recog *recog){ GMMCalc *gc; gc = recog->gc; float mean; float var; int count; gc->up_trigger = gc->down_trigger = FALSE; voice_activity_score(gc, &mean, &var, &count); if (gc->in_voice) { if (mean <= recog->jconf->detect.gmm_downtrigger_thres) { gc->down_trigger = TRUE; gc->in_voice = FALSE; } } else { if (mean >= recog->jconf->detect.gmm_uptrigger_thres) { gc->up_trigger = TRUE; gc->in_voice = TRUE; } }#ifdef GMM_VAD_DEBUG printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count); if (gc->up_trigger) printf(": BEGIN"); if (gc->down_trigger) printf(": END"); printf("\n");#endif}#endif /* GMM_VAD *//* end of file */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -