📄 hmm_05.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
📖 第 1 页 / 共 5 页
字号:
  }  // loop over all search levels and set the alignment level  //  for (long level = 0; level < (long)num_levels_d; level++) {    SearchLevel& search_level = search_engine_d.getSearchLevel(level);    if (search_level.getLevelTag().eq(force_alignment_level_d)) {      alignment_level_d = level;    }  }    // loop over all search levels and verify that the context length  // is not greater than one is skip symbols are used  //  for (long i = 0; i < (long)num_levels_d; i++) {    // retrieve the search levels    //    Long curr_level_index(i);        SearchLevel& curr_level =      search_engine_d.getSearchLevel((long)curr_level_index);    // are we using context at this level?    //    if (curr_level.useContext()) {      // is the right context length greater than one?      //      if (curr_level.getRightContext() > 1) {	// are we using skip symbols at this level	//	if (curr_level.getSkipSymbolTable().length() > 0) {	  curr_level_index.debug(L"current search level");	  return Error::handle(name(), L"load - using skip symbols with a right context length greater than one is not supported", Error::ARG, __FILE__, __LINE__);	}      }    }  }    // after reading the search level tags from language models, we may  // set the output levels  //  if (output_levels_str_d.length() > 0) {    parseLevels(output_levels_str_d, output_levels_d);  }  else {    // set default output level as the top level    //    output_levels_d.setLength((long)num_levels_d);    output_levels_d(0) = ON;  }  // after reading the search level tags from language models, we may  // set the update levels  //  if (update_levels_str_d.length() > 0) {    parseLevels(update_levels_str_d, update_levels_d);  }  else {    // set default update level as the bottom level    //    update_levels_d.setLength((long)num_levels_d);    update_levels_d((long)num_levels_d - 1) = ON;  }    // gracefully exit  //  return true;}// method: initialize//// arguments://  Sdb& sdb: (input) signal data base to run on//// return: a boolean value indicating status//// this method initializes each model with the global mean and covariance//boolean HiddenMarkovModel::initialize(Sdb& sdb_a) {  // declare local variables  //  long num_feat = 0;    long num_vect = 0;  long num_adjacent = 0;    long current_file_num = 0;  String identifier;  Filename input_file_name;  Filename input_ID;    Sof file_sof;  VectorFloat mean;  VectorFloat diagonal;  VectorFloat feature;  MatrixFloat covar;  MatrixFloat temp_covar;      String output;  String train_path;  String train_file;    // load the hmm models from the model file  //  if (!load()) {    return Error::handle(name(), L"run: load hmm models",			 Error::ARG, __FILE__, __LINE__);  }  // set the search engine mode  //  search_engine_d.setInitialLevel(initial_level_d);        search_engine_d.setSearchMode(HierarchicalSearch::TRAIN);      // loop over all levels and update the ones specified  //  for (long i = 0; i < update_levels_d.length(); i++) {    // determine if the mask for the current level is set    //    if (update_levels_d(i) == ON) {          // retrieve the search level of the state level      //      SearchLevel& search_level =	search_engine_d.getSearchLevel(i);            // get the statistical models for the state level      //      Vector<StatisticalModel>& stat_models =	search_level.getStatisticalModels();            // initialize the statistical models      //      if ((update_mode_d == OBSERVATIONS) || (update_mode_d == ALL)) {	// check if we really need to do this	//	if (stat_models.length() > 0) {	  	  // reset the models parameters to begin with	  //	  num_vect = 0;	  mean.clear(Integral::RETAIN);	  covar.clear(Integral::RETAIN);	  temp_covar.clear(Integral::RETAIN);	  for (int j=0; j < stat_models.length(); j++) {	    stat_models(j).clear(Integral::RETAIN);	  }	  	  // accumulate the sufficient statistics for each model	  //    	  for (sdb_a.gotoFirst(); sdb_a.getName(input_ID);	       sdb_a.gotoNext()) {	    	    current_file_num++;	    	    // get audio file path	    //	    identifier.assign(input_ID);	    if (!audio_db_d.getRecord(identifier, input_file_name)) {	      return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__);	    	    }	    	    // print utterance processing information	    //      	    if (verbosity_d >= Integral::BRIEF) {	      Console::increaseIndention();	      output.assign(L"\nprocessing file ");	      output.concat(current_file_num);	      output.concat(L" (");	      output.concat(identifier);	      output.concat(L")");	      output.concat(L": ");	      output.concat(input_file_name);	      Console::put(output);	      Console::decreaseIndention();	    }	    	    // initialize the top level with the corresponding transcription	    //  we don't need a transcription database for segmented uttreances	    //	    if (transcription_db_file_d.length() > 0) {	      initTranscription(identifier, current_file_num - 1);	    }	    	    // process the utterance file by the front end	    //	    fe_d.open(input_file_name);	    	    // loop over the feature vectors in the file	    //	    for (int j=0; j < fe_d.getNumFrames(); j++) {	      	      // accumulate the number of features	      //	      num_vect++;	      	      // get the feature vector	      //      	      fe_d.getVector(feature, 0, (long)j);	      	      // get the number of features for each feature vector	      //	      if (num_vect == 1) {		num_feat = feature.length();		mean.setLength(num_feat);		covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL);		temp_covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL);	      }	      	      // add the feature values to the mean	      //	      mean.add(feature);	      	      // store product of features into covariance metrix	      //	      for (long l=0; l < num_feat; l++) {		temp_covar.setValue(l, l, (float)(feature(l) * feature(l)));	      }	      covar.add(temp_covar);	      	      // accumulate the sufficient statistics	      //	      for (int l=0; l < stat_models.length(); l++) {		stat_models(l).accumulate(feature);	      }      	    }	    	    // close the front end	    //	    fe_d.close();	  }	  	  // compute the mean vector for the feature vectors	  //	  mean.div(num_vect);	  	  // compute the covariance matrix for the feature vectors	  //	  covar.div(num_vect - 1);	  for (long l=0; l < num_feat; l++) {	    float value = covar.getValue(l, l) - (mean(l) * mean(l));	    covar.setValue(l, l, value);	  }	  	  // retrieve the diagonal of the covariance matrix	  //	  covar.getDiagonal(diagonal);	  	  // loop over each statistical model and initialize the model parameters	  //	  VectorFloat param(2);	  param(0) = (float)num_feat;	  param(1) = (float)num_vect;	  for (int i=0; i < stat_models.length(); i++) {	    stat_models(i).initialize(param);	  }	  	  // write the variance floor file	  //	  diagonal.mult(variance_floor_d);	  file_sof.open(variance_floor_file_d, File::WRITE_ONLY);	  diagonal.write(file_sof, (long)0);	  	  // close the variance floor file	  //	  file_sof.close();    	}      }        // initialize the state transition probabilities      //      if ((update_mode_d == TRANSITIONS) || (update_mode_d == ALL)) {		for (int j=0; j < search_level.getNumSubGraphs(); j++) {	  	  // retrieve the subgraph	  //	  DiGraph<SearchNode>& subgraph = search_level.getSubGraph((long)j);	  	  // loop over each vertex adjacent to the start vertex	  //	  GraphVertex<SearchNode>* start_vertex = subgraph.getStart();	  num_adjacent = start_vertex->length();    	  for (boolean more = start_vertex->gotoFirst(); more;	       more = start_vertex->gotoNext()) {	    GraphArc<SearchNode>* vertex_arc = start_vertex->getCurr();	    if (num_adjacent == 1) {	      vertex_arc->setWeight(0);	    } else {	      vertex_arc->setWeight(-log(num_adjacent));	    }      	  }	  	  // loop over each vertex in the subgraph	  //	  for (boolean more = subgraph.gotoFirst(); more;	       more = subgraph.gotoNext()) {	    GraphVertex<SearchNode>* vertex =	      const_cast<GraphVertex<SearchNode>* >(subgraph.getCurr());	    num_adjacent = vertex->length();    	    for (boolean more = vertex->gotoFirst(); more;	       more = vertex->gotoNext()) {	      GraphArc<SearchNode>* vertex_arc = vertex->getCurr();	      if (num_adjacent == 1) {		vertex_arc->setWeight(0);	      } else {		vertex_arc->setWeight(-log(num_adjacent));	      }	    }	  }	}      }    }  }  // write the models to file    //  if (!store()) {    return Error::handle(name(), L"initialize", Error::ARG, __FILE__, __LINE__);  }  // exit gracefully  //  return true;  }// method: linearDecoder//// arguments://  Sdb& sdb: (input) signal data base to run on//// return: a boolean value indicating status//// this is the run method for a linear decoder//boolean HiddenMarkovModel::linearDecoder(Sdb& sdb_a) {  // added for word-internal context generation  // branch on the CONTEXT_GENERATION using SYMBOL_GENERATION  // does not support streaming input.  //  if (algorithm_d == CONTEXT_GENERATION && !stream_d &&      implementation_d == SYMBOL_GENERATION && function_mode_d == NONE) {    // declare local variables    //    long num_valid_files = 0;    long current_file_num = 0;    long total_num_frames = 0;    String identifier;    Filename input_file_name;        String output;        Filename input_ID;    // load the hmm models from the model file    //    if (!load()) {      return Error::handle(name(), L"run: load hmm models",			   Error::ARG, __FILE__, __LINE__);    }    // set the search engines target context level    //    search_engine_d.setContextLevel(context_level_d);        // set the search engines mode    //    search_engine_d.setInitialLevel(initial_level_d);        search_engine_d.setSearchMode(HierarchicalSearch::TRAIN);        // initialize the context generation    //    if (!search_engine_d.initializeContextGeneration()) {      return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);    }        // loop through each input utterance    //    current_file_num = 0;    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID);	 sdb_a.gotoNext()) {            current_file_num++;            // get audio file path      //      identifier.assign(input_ID);      if (!audio_db_d.getRecord(identifier, input_file_name)) {	return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__);	          }      	            // print utterance processing information      //            if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	output.assign(L"\nprocessing file ");	output.concat(current_file_num);	output.concat(L" (");	output.concat(identifier);	output.concat(L")");	output.concat(L": ");	output.concat(input_file_name);	Console::put(output);	Console::decreaseIndention();	          }            // clear all data structures needed to decode      //      if (!search_engine_d.initializeLinearDecoder()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }      // initialize the top search level with the corresponding transcription      //  we don't need a transcription database for segmented utterances      //      if (transcription_db_file_d.length() > 0) {	initTranscription(identifier, current_file_num - 1);      }            // process the utterance file by the front end      //      fe_d.open(input_file_name);            // decode the utterance      //	      if (!verify_d) {	  	search_engine_d.linearDecoder(fe_d);      }      // pick up the best hypothesis and determine the utterance probability      //      String hypotheses;      double score = 0;      long num_frames = 0;      DoubleLinkedList<Instance> instance_path;            if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d,					 score, num_frames, instance_path)) {		hypotheses.clear();	instance_path.clear();		num_valid_files++;	total_num_frames += num_frames;      }       	      // close the front end processing      //      fe_d.close();          } // end of looping through the input utterances    // write the context dependent symbols    //    Vector<String>& context_list = search_engine_d.getContextList();    // open the context list    //    Sof input_sof;    Sdb symbol_list_sdb;        if (!input_sof.open(context_list_d, File::WRITE_ONLY)) {      return Error::handle(name(), L"linearDecoder - unable to open context list", Error::ARG, __FILE__, __LINE__);    }    // loop over each element in the symbol list and append it to the sdb    //    String tmp_str;    SearchSymbol symbol_context;    for (long i=0; i < context_list.length(); i++) {      symbol_context.assign(context_list(i));      tmp_str.assign(SearchSymbol::NO_LEFT_CONTEXT);      tmp_str.concat(L"-");      symbol_context.replace(tmp_str, L"");      tmp_str.assign(L"+");      tmp_str.concat(SearchSymbol::NO_RIGHT_CONTEXT);      symbol_context.replace(tmp_str, L"");      symbol_list_sdb.append(symbol_context);    }        // write the sdb list    //    symbol_list_sdb.write(input_sof, (long)context_level_d);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -