📄 hmm_05.cc
字号:
} // loop over all search levels and set the alignment level // for (long level = 0; level < (long)num_levels_d; level++) { SearchLevel& search_level = search_engine_d.getSearchLevel(level); if (search_level.getLevelTag().eq(force_alignment_level_d)) { alignment_level_d = level; } } // loop over all search levels and verify that the context length // is not greater than one is skip symbols are used // for (long i = 0; i < (long)num_levels_d; i++) { // retrieve the search levels // Long curr_level_index(i); SearchLevel& curr_level = search_engine_d.getSearchLevel((long)curr_level_index); // are we using context at this level? // if (curr_level.useContext()) { // is the right context length greater than one? // if (curr_level.getRightContext() > 1) { // are we using skip symbols at this level // if (curr_level.getSkipSymbolTable().length() > 0) { curr_level_index.debug(L"current search level"); return Error::handle(name(), L"load - using skip symbols with a right context length greater than one is not supported", Error::ARG, __FILE__, __LINE__); } } } } // after reading the search level tags from language models, we may // set the output levels // if (output_levels_str_d.length() > 0) { parseLevels(output_levels_str_d, output_levels_d); } else { // set default output level as the top level // output_levels_d.setLength((long)num_levels_d); output_levels_d(0) = ON; } // after reading the search level tags from language models, we may // set the update levels // if (update_levels_str_d.length() > 0) { parseLevels(update_levels_str_d, update_levels_d); } else { // set default update level as the bottom level // update_levels_d.setLength((long)num_levels_d); update_levels_d((long)num_levels_d - 1) = ON; } // gracefully exit // return true;}// method: initialize//// arguments:// Sdb& sdb: (input) signal data base to run on//// return: a boolean value indicating status//// this method initializes each model with the global mean and covariance//boolean HiddenMarkovModel::initialize(Sdb& sdb_a) { // declare local variables // long num_feat = 0; long num_vect = 0; long num_adjacent = 0; long current_file_num = 0; String identifier; Filename input_file_name; Filename input_ID; Sof file_sof; VectorFloat mean; VectorFloat diagonal; VectorFloat feature; MatrixFloat covar; MatrixFloat temp_covar; String output; String train_path; String train_file; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop over all levels and update the ones specified // for (long i = 0; i < update_levels_d.length(); i++) { // determine if the mask for the current level is set // if (update_levels_d(i) == ON) { // retrieve the search level of the state level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // get the statistical models for the state level // Vector<StatisticalModel>& stat_models = search_level.getStatisticalModels(); // initialize the statistical models // if ((update_mode_d == OBSERVATIONS) || (update_mode_d == ALL)) { // check if we really need to do this // if (stat_models.length() > 0) { // reset the models parameters to begin with // num_vect = 0; mean.clear(Integral::RETAIN); covar.clear(Integral::RETAIN); temp_covar.clear(Integral::RETAIN); for (int j=0; j < stat_models.length(); j++) { stat_models(j).clear(Integral::RETAIN); } // accumulate the sufficient statistics for each model // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the top level with the corresponding transcription // we don't need a transcription database for segmented uttreances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // process the utterance file by the front end // fe_d.open(input_file_name); // loop over the feature vectors in the file // for (int j=0; j < fe_d.getNumFrames(); j++) { // accumulate the number of features // num_vect++; // get the feature vector // fe_d.getVector(feature, 0, (long)j); // get the number of features for each feature vector // if (num_vect == 1) { num_feat = feature.length(); mean.setLength(num_feat); covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL); temp_covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL); } // add the feature values to the mean // mean.add(feature); // store product of features into covariance metrix // for (long l=0; l < num_feat; l++) { temp_covar.setValue(l, l, (float)(feature(l) * feature(l))); } covar.add(temp_covar); // accumulate the sufficient statistics // for (int l=0; l < stat_models.length(); l++) { stat_models(l).accumulate(feature); } } // close the front end // fe_d.close(); } // compute the mean vector for the feature vectors // mean.div(num_vect); // compute the covariance matrix for the feature vectors // covar.div(num_vect - 1); for (long l=0; l < num_feat; l++) { float value = covar.getValue(l, l) - (mean(l) * mean(l)); covar.setValue(l, l, value); } // retrieve the diagonal of the covariance matrix // covar.getDiagonal(diagonal); // loop over each statistical model and initialize the model parameters // VectorFloat param(2); param(0) = (float)num_feat; param(1) = (float)num_vect; for (int i=0; i < stat_models.length(); i++) { stat_models(i).initialize(param); } // write the variance floor file // diagonal.mult(variance_floor_d); file_sof.open(variance_floor_file_d, File::WRITE_ONLY); diagonal.write(file_sof, (long)0); // close the variance floor file // file_sof.close(); } } // initialize the state transition probabilities // if ((update_mode_d == TRANSITIONS) || (update_mode_d == ALL)) { for (int j=0; j < search_level.getNumSubGraphs(); j++) { // retrieve the subgraph // DiGraph<SearchNode>& subgraph = search_level.getSubGraph((long)j); // loop over each vertex adjacent to the start vertex // GraphVertex<SearchNode>* start_vertex = subgraph.getStart(); num_adjacent = start_vertex->length(); for (boolean more = start_vertex->gotoFirst(); more; more = start_vertex->gotoNext()) { GraphArc<SearchNode>* vertex_arc = start_vertex->getCurr(); if (num_adjacent == 1) { vertex_arc->setWeight(0); } else { vertex_arc->setWeight(-log(num_adjacent)); } } // loop over each vertex in the subgraph // for (boolean more = subgraph.gotoFirst(); more; more = subgraph.gotoNext()) { GraphVertex<SearchNode>* vertex = const_cast<GraphVertex<SearchNode>* >(subgraph.getCurr()); num_adjacent = vertex->length(); for (boolean more = vertex->gotoFirst(); more; more = vertex->gotoNext()) { GraphArc<SearchNode>* vertex_arc = vertex->getCurr(); if (num_adjacent == 1) { vertex_arc->setWeight(0); } else { vertex_arc->setWeight(-log(num_adjacent)); } } } } } } } // write the models to file // if (!store()) { return Error::handle(name(), L"initialize", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; }// method: linearDecoder//// arguments:// Sdb& sdb: (input) signal data base to run on//// return: a boolean value indicating status//// this is the run method for a linear decoder//boolean HiddenMarkovModel::linearDecoder(Sdb& sdb_a) { // added for word-internal context generation // branch on the CONTEXT_GENERATION using SYMBOL_GENERATION // does not support streaming input. // if (algorithm_d == CONTEXT_GENERATION && !stream_d && implementation_d == SYMBOL_GENERATION && function_mode_d == NONE) { // declare local variables // long num_valid_files = 0; long current_file_num = 0; long total_num_frames = 0; String identifier; Filename input_file_name; String output; Filename input_ID; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engines target context level // search_engine_d.setContextLevel(context_level_d); // set the search engines mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // initialize the context generation // if (!search_engine_d.initializeContextGeneration()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // loop through each input utterance // current_file_num = 0; for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // process the utterance file by the front end // fe_d.open(input_file_name); // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; double score = 0; long num_frames = 0; DoubleLinkedList<Instance> instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { hypotheses.clear(); instance_path.clear(); num_valid_files++; total_num_frames += num_frames; } // close the front end processing // fe_d.close(); } // end of looping through the input utterances // write the context dependent symbols // Vector<String>& context_list = search_engine_d.getContextList(); // open the context list // Sof input_sof; Sdb symbol_list_sdb; if (!input_sof.open(context_list_d, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - unable to open context list", Error::ARG, __FILE__, __LINE__); } // loop over each element in the symbol list and append it to the sdb // String tmp_str; SearchSymbol symbol_context; for (long i=0; i < context_list.length(); i++) { symbol_context.assign(context_list(i)); tmp_str.assign(SearchSymbol::NO_LEFT_CONTEXT); tmp_str.concat(L"-"); symbol_context.replace(tmp_str, L""); tmp_str.assign(L"+"); tmp_str.concat(SearchSymbol::NO_RIGHT_CONTEXT); symbol_context.replace(tmp_str, L""); symbol_list_sdb.append(symbol_context); } // write the sdb list // symbol_list_sdb.write(input_sof, (long)context_level_d);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -