📄 dec_decode_3.cc
字号:
// file: dec_decode_3.cc//// system include files//#include <unistd.h>// isip include files//#include "decoder.h"#include "decoder_constants.h"#include <time.h>// method: decode_cc//// arguments:// FILE* fin: (input) feature input file// FILE* fout: (input) decoder output file// FILE* flat: (input) decoder output lattice file// FILE* fnbest: (input) decoder output nbest file//// return: logical_1 indicating status//// this method decodes an utterance specified by the input file and// writes the output to the output file, and the generated lattice ---// if any --- to the lattice output file//logical_1 Decoder::decode_cc(FILE* fin_a, FILE* fout_a, FILE* flat_a, FILE* nbest_a) { // time counting variables // float_8 begin, end; // start the counter // decode_time_d = 0; begin = (float_8) clock(); // reset the decoder to prepare for decoding this utterance // reset_cc(); // print the initial list of word-hypotheses to file // if (demo_mode_d == ISIP_TRUE) { write_output_cc(fout_a); } num_steps_d = (int_4)0; // flag indicating the start of utterrance // logical_1 start = ISIP_TRUE; // read HTK header // if (input_feature_format_d == DEC_HTK_FORMAT) { read_htk_header_cc(fin_a); } // loop over all frames of input feature data // while (read_features_cc(fin_a, start) == ISIP_TRUE) { // increment the current time frame // frame_d++; // reset counts // for (int_4 i = 0; i < num_levels_d; i++) { num_traces_gen_d[i] = (int_4)0; num_traces_del_d[i] = (int_4)0; } // if this is the demo mode // if (demo_mode_d == ISIP_TRUE) { // read number of steps to advance frames // read_key_cc(); // if this is the abort number, quit loop and go to find // sentence end hyps // if (num_steps_d < (int_4)0) { break; } } // loop over all active word and phone-level traces in the // current time frame and grow the next phone // enter_models_cc(); // maximum active model instance pruning on all instances of // models propagated from previous frame // prune_inst_cc(); // loop over all active state-level traces // evaluate the state and update the score for each trace // then find all possible transitions and advance traces // project_states_cc(); // find the beam pruning score for state-level pruning // set_beam_cc(DEC_STATE_LEVEL); // prune away any traces that do not satisfy the new state beam // beam_prune_cc(); // find the phone level beam pruning threshold // set_beam_cc(DEC_MODEL_LEVEL); // create word-level traces for the word-end phone-level traces // make sure that the inactive traces are no longer kept // exit_word_cc(); // find the word level beam pruning threshold // set_beam_cc(DEC_WORD_LEVEL); // sort the word score list to get the cutoff score // word_cutoff_cc(); // store counts // for (int_4 i = 0; i < num_levels_d; i++) { total_gen_d[i] += num_traces_gen_d[i]; total_del_d[i] += num_traces_del_d[i]; num_traces_total_d[i] += (num_traces_gen_d[i] - num_traces_del_d[i]); } // output the frame status // if (frame_d % DEC_FRAMES_OUTNUM == (int_4)0) { // get the end counter after decoding 100 frames // 'else' counts on the time when wrapping around // end = (float_8) clock(); if (begin <= end) { decode_time_d += (double) (end - begin); } else { decode_time_d += (2147483647 + end) + (2147483647 - begin); } fprintf(stdout, "Processed %ld frames: decoding time = %10.1f seconds, %7.2f xRT\n", frame_d, decode_time_d/1000000, decode_time_d/(frame_d*10000)); fflush(stdout); // start the counter // begin = (float_8) clock(); } // dump the frame statistics to output // if ((demo_mode_d == ISIP_TRUE) && (num_steps_d == (int_4)0)) { // the word-level traces are used to determine the best word // hypotheses at the end of this frame // find_nbest_cc(); // print the n-best list of word-hypotheses to file // write_output_cc(fout_a); } // end if demo mode } // end while loop over all frames // get the end counter after decoding all the frames // 'else' counts on the time when wrapping around // end = (float_8) clock(); if (begin <= end) { decode_time_d += (double) (end - begin); } else { decode_time_d += (2147483647 + end) + (2147483647 - begin); } fprintf(stdout, "Processed %ld frames: decoding time = %10.1f seconds, %7.2f xRT\n", frame_d, decode_time_d/1000000, decode_time_d/(frame_d*10000)); fflush(stdout); // start the counter // begin = (float_8) clock(); // we have now finished the state sequences so we need to backtrace // i.e. find an emitting word level trace with the highest score and // follow backpointers to the start trace // backtrace_cc(); // if no sentence end is found print the best word end anyway // if (num_hyps_d == 0) { // print error message and find the best word end // fprintf(stdout, "No sentence-end hypotheses were found.\n"); find_nbest_cc(); } // print the n-best list of sentence-hypotheses to file // write_output_cc(fout_a); // if this is lattice generation mode build a lattice from this, // only if there are valid paths // if ((num_hyps_d > 0) && ((function_mode_d == DEC_LATTICE_GENERATE_FUNCTION) || (function_mode_d == DEC_LATTICE_LATTICE_FUNCTION))) { build_lattice_cc(flat_a); } // print the n-best list of sentence-hypotheses to file // else if ((num_nbest_d > 1) && (nbest_a != NULL)) { Lattice* lat = NULL; build_lattice_cc(lat); lattice_to_nbest_cc(lat, nbest_a, nbest_max_paths_d, nbest_beam_d); delete lat; } // clean up memory used for this utterance // clean_cc(); // get the end counter after decoding all the frames // 'else' counts on the time when wrapping around // end = (float_8) clock(); if (begin <= end) { decode_time_d += (float_8) (end - begin); } else { decode_time_d += (2147483647 + end) + (2147483647 - begin); } fprintf(stdout, "Decoded the utterance: %ld frames, %.1f seconds, %.2f xRT\n", frame_d, decode_time_d/1000000, decode_time_d/(frame_d*10000)); fflush(stdout); // update the overall counter // decode_time_d /= 1000000; // line break // fprintf(stdout, "\n"); // exit gracefully // return ISIP_TRUE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -