📄 recogmain.c
字号:
/* feature vector input */ /************************/ if (jconf->input.speech_input == SP_MFCFILE) { /************************/ /* parameter file input */ /************************/ /* parameter type check --- compare the type to that of HMM, and adjust them if necessary */ if (jconf->input.paramtype_check_flag) { for(am=recog->amlist;am;am=am->next) { /* return param itself or new malloced param */ if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) { /* failed */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* tell failure */ result_error(recog, J_RESULT_STATUS_FAIL); goto end_recog; } } } /* whole input is already read, so set input status to end of stream */ /* and jump to the start point of 1st pass */ ret = 0; } } else { /*************************/ /* buffered speech input */ /*************************/ if (!recog->process_segment) { /* no segment left */ /****************************************/ /* store raw speech samples to speech[] */ /****************************************/ recog->speechlen = 0; for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* tell module to start recording */ /* the "adin_cut_callback_store_buffer" simply stores the input speech to a buffer "speech[]" */ /* end of this input will be determined by either end of stream (in case of file input), or silence detection by adin_go(), or 'TERMINATE' command from module (if module mode) */ ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog); if (ret < 0) { /* error end in adin_go */ if (ret == -2 || recog->process_want_terminate) { /* terminated by module */ /* output fail */ result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } jlog("ERROR: an error occured while recording input\n"); return -1; } /* output recorded length */ seclen = (float)recog->speechlen / (float)jconf->input.sfreq; jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen); /* -rejectshort 回年箕, 掐蜗が回年箕粗笆布であれば ここで掐蜗を逮笛する */ /* when using "-rejectshort", and input was shorter than specified, reject the input here */ if (jconf->reject.rejectshortlen > 0) { if (seclen * 1000.0 < jconf->reject.rejectshortlen) { result_error(recog, J_RESULT_STATUS_REJECT_SHORT); goto end_recog; } } /**********************************************/ /* acoustic analysis and encoding of speech[] */ /**********************************************/ jlog("STAT: ### speech analysis (waveform -> MFCC)\n"); /* CMN will be computed for the whole buffered input */ if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) { /* error end, end stream */ ret = -1; /* tell failure */ result_error(recog, J_RESULT_STATUS_FAIL); goto end_recog; } /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* output frame length */ callback_exec(CALLBACK_STATUS_PARAM, recog); } }#ifdef ENABLE_PLUGIN /* call post-process plugin if exist */ plugin_exec_vector_postprocess_all(recog->mfcclist->param);#endif /******************************************************/ /* 1st-pass --- backward search to compute heuristics */ /******************************************************/ if (!jconf->decodeopt.realtime_flag) { /* prepare for outprob cache for each HMM state and time frame */ /* assume all MFCCCalc has params of the same sample num */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); } } /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* execute computation of left-to-right backtrellis */ if (get_back_trellis(recog) == FALSE) { jlog("ERROR: fatal error occured, program terminates now\n"); return -1; }#ifdef BACKEND_VAD /* if not triggered, skip this segment */ if (recog->jconf->decodeopt.segment && ! recog->triggered) { goto end_recog; }#endif /* execute callback for 1st pass result */ /* result.status <0 must be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1, recog);#ifdef WORD_GRAPH /* result.wg1 == NULL should be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);#endif /* execute callback at end of pass1 */ if (recog->triggered) { callback_exec(CALLBACK_EVENT_PASS1_END, recog); } /* END OF BUFFERED 1ST PASS */ } /**********************************/ /* end processing of the 1st-pass */ /**********************************/ /* on-the-fly 1st pass processing will join here */ /* -rejectshort 回年箕, 掐蜗が回年箕粗笆布であれば玫瑚己窃として */ /* 妈2パスを悸乖せずにここで姜位する */ /* when using "-rejectshort", and input was shorter than the specified length, terminate search here and output recognition failure */ if (jconf->reject.rejectshortlen > 0) { mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0; if (mseclen < jconf->reject.rejectshortlen) { result_error(recog, J_RESULT_STATUS_REJECT_SHORT); goto end_recog; } }#ifdef POWER_REJECT if (power_reject(recog)) { result_error(recog, J_RESULT_STATUS_REJECT_POWER); goto end_recog; }#endif /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* if GMM is specified and result are to be rejected, terminate search here */ if (jconf->reject.gmm_reject_cmn_string != NULL) { if (! gmm_valid_input(recog)) { result_error(recog, J_RESULT_STATUS_REJECT_GMM); goto end_recog; } } /* for instances with "-1pass", copy 1st pass result as final */ /* execute stack-decoding search */ /* they will be skipepd in the next pass */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* skip if 1st pass was failed */ if (r->result.status < 0) continue; /* already stored on word recognition, so skip this */ if (r->lmvar == LM_DFA_WORD) continue; if (r->config->compute_only_1pass) { if (verbose_flag) { jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name); } /* prepare result storage */ result_sentence_malloc(r, 1); /* finalize result when no hypothesis was obtained */ pass2_finalize_on_no_result(r, TRUE); } } /***********************************************/ /* 2nd-pass --- forward search with heuristics */ /***********************************************/ pass2_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, skip 2nd pass */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; pass2_p = TRUE; } if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog);#if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE) /* adjust trellis score not to contain outprob of the last frames */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, skip 2nd pass */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; if (! r->am->hmminfo->multipath) { bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param); }#ifdef LM_FIX_DOUBLE_SCORING if (r->lmtype == LM_PROB) { bt_discount_lm(r->backtrellis); }#endif }#endif /* execute stack-decoding search */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, just copy from 1st pass result */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; /* prepare result storage */ if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) { result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm)); } else { result_sentence_malloc(r, r->config->output.output_hypo_maxnum); } /* do 2nd pass */ if (r->lmtype == LM_PROB) { wchmm_fbs(r->am->mfcc->param, r, 0, 0); } else if (r->lmtype == LM_DFA) { if (r->config->output.multigramout_flag) { /* execute 2nd pass multiple times for each grammar sequencially */ /* to output result for each grammar */ MULTIGRAM *m; boolean has_success = FALSE; for(m = r->lm->grammars; m; m = m->next) { if (m->active) { jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id); wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num); if (r->result.status == J_RESULT_STATUS_SUCCESS) { has_success = TRUE; } } } r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL; } else { /* only the best among all grammar will be output */ wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num); } } } /* do forced alignment if needed */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if search failed on 2nd pass, skip this */ if (r->result.status < 0) continue; /* do needed alignment */ do_alignment_all(r, r->am->mfcc->param); } /* output result */ callback_exec(CALLBACK_RESULT, recog);#ifdef ENABLE_PLUGIN plugin_exec_process_result(recog);#endif /* output graph */ /* r->result.wg == NULL should be skipped inside the callback */ ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->config->compute_only_1pass) continue; if (r->result.status < 0) continue; if (r->config->graph.lattice) ok_p = TRUE; } if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog); /* output confnet */ /* r->result.confnet == NULL should be skipped inside the callback */ ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->config->compute_only_1pass) continue; if (r->result.status < 0) continue; if (r->config->graph.confnet) ok_p = TRUE; } if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog); /* clear work area for output */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; clear_result(r); } /* output end of 2nd pass */ if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog);#ifdef DEBUG_VTLN_ALPHA_TEST if (r->am->mfcc->para->vtln_alpha == 1.0) { /* if vtln parameter remains default, search for VTLN parameter */ vtln_alpha(recog, r); }#endif end_recog: /**********************/ /* end of recognition */ /**********************/ /* update CMN info for next input (in case of realtime wave input) */ if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->param->samplenum > 0) { RealTimeCMNUpdate(mfcc, recog); } } } process_segment_last = recog->process_segment; if (jconf->decodeopt.segment) { /* sp-segment mode */ /* param is now shrinked to hold only the processed input, and */ /* the rests are holded in (newly allocated) "rest_param" */ /* if this is the last segment, rest_param is NULL */ /* assume all segmentation are synchronized */ recog->process_segment = FALSE; for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->rest_param != NULL) { /* process the rest parameters in the next loop */ recog->process_segment = TRUE; free_param(mfcc->param); mfcc->param = mfcc->rest_param; mfcc->rest_param = NULL; } } } /* callback of recognition end */ if (jconf->decodeopt.segment) {#ifdef BACKEND_VAD if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);#else callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);#endif } else { callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); } if (verbose_flag) jlog("\n"); jlog_flush(); if (jconf->decodeopt.segment) { /* sp-segment mode */ if (recog->process_segment == TRUE) { if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n"); } else { /* input has reached end of stream, terminate program */ if (ret <= 0 && ret != -2) break; } } else { /* not sp-segment mode */ /* input has reached end of stream, terminate program */ if (ret <= 0 && ret != -2) break; } /* recognition continues for next (silence-aparted) segment */ } /* END OF STREAM LOOP */ /* close the stream */ if (jconf->input.type == INPUT_WAVEFORM) { if (adin_end(recog->adin) == FALSE) return -1; } if (jconf->input.speech_input == SP_MFCMODULE) { if (mfc_module_end(recog->mfcclist) == FALSE) return -1; } /* return to the opening of input stream */ return(0);}/** * <EN> * @brief Recognize an input stream. * * This function repeat recognition process for the whole input stream, * using segmentation and detection if required. It ends when the * whole input has been processed. * * When a recognition stop is requested from application, the following * callbacks will be called in turn: CALLBACK_EVENT_PAUSE, * CALLBACK_PAUSE_FUNCTION, CALLBACK_EVENT_RESUME. After finishing executing * all functions in these callbacks, recognition will restart. * If you have something to be processed while recognition stops, * you should write the function as callback to CALLBACK_PAUSE_FUNCTION. * Note that recognition will restart immediately after all functions * registered in CALLBACK_PAUSE_FUNCTION has been finished. * * </EN> * <JA> * @brief 掐蜗ストリ〖ムの千急を乖う * * 掐蜗ストリ〖ムに滦して * ∈涩妥であれば∷惰粗浮叫やVADを乖いながら千急を帆り手し乖っていく. * 掐蜗が姜眉に茫するかあるいはエラ〖で姜位する. * * アプリケ〖ションから千急の面们をリクエストされたときは· * CALLBACK_EVENT_PAUSE·CALLBACK_PAUSE_FUNCTION, * CALLBACK_EVENT_RESUME の界に钙んだあと千急に提る. このため· * 千急を面们させている粗に乖う借妄は·CALLBACK_PAUSE_FUNCTION * に判峡しておく涩妥がある. CALLBACK_PAUSE_FUNCTION に * 判峡されている链ての借妄が姜位したら千急を极瓢弄に浩倡するので * 庙罢すること. * * </JA> * * @param recog [i/o] engine instance * * @return 0 when finished recognizing all the input stream to the end, * or -1 on error. * * @callgraph * @callergraph * @ingroup engine */intj_recognize_stream(Recog *recog){ int ret; do { ret = j_recognize_stream_core(recog); switch(ret) { case 1: /* paused by a callback (stream will continue) */ /* call pause event callbacks */ callback_exec(CALLBACK_EVENT_PAUSE, recog); /* call pause functions */ /* block until all pause functions exits */ if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) { jlog("WARNING: pause requested but no pause function specified\n"); jlog("WARNING: engine will resume now immediately\n"); } callback_exec(CALLBACK_PAUSE_FUNCTION, recog); /* after here, recognition will restart for the rest input */ /* call resume event callbacks */ callback_exec(CALLBACK_EVENT_RESUME, recog); break; case 0: /* end of stream */ /* go on to the next input */ break; case -1: /* error */ jlog("ERROR: an error occured while recognition, terminate stream\n"); return -1; } } while (ret == 1); /* loop when paused by callback */ return 0;}/* end of file */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -