📄 hmm_05.cc
字号:
} else { total_output_file.close(); } } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming, set input_file_name to STREAM_FILE // if (stream_d) { input_file_name.assign(File::STREAM_FILE); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!stack_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // process the utterance file by the front end // fe_d.open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(data); // decode the utterance // if (!verify_d) { // use stack search engine // stack_engine_d.decode(fe_d); } fe_d.close(); // pick up the best hypothesis and its parameters // String hypotheses; double score = 0; long num_frames = 0; DoubleLinkedList<Trace> trace_path; if (!stack_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypotheses); // close the output file // output_file.close(); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypotheses); if ((long)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // boolean more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypotheses); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // stack_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of DECODE using STACK // branch on TRAIN using BAUM_WELCH and function UPDATE // does not support streaming input // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == UPDATE && !stream_d) { // declare local variables // String output; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // load the accumulators form file // loadAccumulators(); // update the models using the accumulated statistics // update(); // write the trained models to file // if (!store()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of TRAIN using BAUM_WELCH and UPDATE // branch on TRAIN using BAUM_WELCH and function ACCUMULATE // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == ACCUMULATE) { // declare local variables // long num_valid_files = 0; long current_file_num = 0; long total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector<VectorFloat> data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // process the utterance file by the front end // fe_d.open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(data); // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; double score = 0; long num_frames = 0; DoubleLinkedList<Trace> trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph<TrainNode>*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score/num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // fe_d.close(); } // end of looping through the input utterances // write the accumulatots to file // storeAccumulators(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // clean up all memory // search_engine_d.clear(); // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of TRAIN using BAUM_WELCH and ACCUMULATE // branch on TRAIN using BAUM_WELCH and NONE // does not support streaming input. // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == NONE && !stream_d) { // declare local variables // long num_valid_files = 0; long current_file_num = 0; long total_num_frames = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -