📄 hmm_05.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
📖 第 1 页 / 共 5 页
字号:
	}	else {	  total_output_file.close();	}      }    }    // loop through the input utterances    //    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) {      current_file_num++;      // if input is streaming, set input_file_name to STREAM_FILE      //      if (stream_d) {	input_file_name.assign(File::STREAM_FILE);      }      // otherwise get the file path      //      else {		// get audio file path	//	identifier.assign(input_ID);	if (!audio_db_d.getRecord(identifier, input_file_name)) {	  return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__);	    	}      }      // print utterance processing information      //                  if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	output.assign(L"\nprocessing file ");	output.concat(current_file_num);	output.concat(L" (");	output.concat(identifier);	output.concat(L")");	output.concat(L": ");	output.concat(input_file_name);	Console::put(output);	Console::decreaseIndention();      }            // initialize the decoder      //      if (!stack_engine_d.initializeLinearDecoder()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }      //  we don't need a transcription database for segmented utterances      //      if (transcription_db_file_d.length() > 0) {	initTranscription(identifier, current_file_num - 1);      }            // process the utterance file by the front end      //      fe_d.open(input_file_name);            // retrieve the all frames of data in advance      //      extractFeatures(data);            // decode the utterance      //            if (!verify_d) {	// use stack search engine	//	stack_engine_d.decode(fe_d);      }      fe_d.close();      // pick up the best hypothesis and its parameters      //      String hypotheses;      double score = 0;      long num_frames = 0;      DoubleLinkedList<Trace> trace_path;            if (!stack_engine_d.getHypotheses(hypotheses, alignment_level_d,					score, num_frames, trace_path)) {	  	// if no hypothesis found	//	hypotheses.clear();	trace_path.clear();	  	if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) {	  // print the warning message	  //	  Console::increaseIndention();	  Console::put(L"\nno hypothesis found");	  Console::decreaseIndention();	}      }      else {	num_valid_files++;	total_num_frames += num_frames;      }      // print the detailed info about the hypothesis      //      if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	Console::increaseIndention();	output.assign(L"\nhyp:    ");	output.concat(hypotheses);	output.concat(L"\nscore:  ");	output.concat(score);	output.concat(L"   frames: ");	output.concat(num_frames);	Console::put(output);	Console::decreaseIndention();	Console::decreaseIndention();      }           if (output_mode_d == TRANSFORM) {	// transform the input file and its path to the output file	//	sdb_a.transformName(output_file_name, input_file_name);	// open the output file and write best hypothesis	//	File output_file;	if (!output_file.open(output_file_name, File::WRITE_ONLY)) {	  return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__);	}	output_file.put(hypotheses);	      	// close the output file	//	output_file.close();      }	        if (output_mode_d == FILE) {		// output the best hypothesis	//	output.assign(hypotheses);	if ((long)alignment_level_d < 0) {	  output.concat(L" (");	  output.concat(input_ID);	  output.concat(L")\n");	}	if (output_file_d.length() != 0) {	  total_output_file.open(output_file_d, File::APPEND_PLUS);	  total_output_file.put(output);	  total_output_file.close();	}	else {	  if (verbosity_d < Integral::BRIEF) {	    Console::increaseIndention();	    Console::increaseIndention();	    Console::put(output);	    Console::decreaseIndention();	    Console::decreaseIndention();	  }	}      }      if (output_mode_d == LIST) {	// output the hypothesis to the corresponding file	// from the output list	//	boolean more_files;	  	if (current_file_num == 1) {	    	  // read output files list into signal database	  //	  Sof output_list_file;	  if (!output_list_file.open(output_list_d)) {	    return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__);	  }	  	  output_sdb.read(output_list_file, 0);	  output_list_file.close();	  	  more_files = output_sdb.gotoFirst();	}	else {	  	  // move to the next output file	  //	  more_files = output_sdb.gotoNext();	}	if (!more_files) {	  return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__);	}	  	// open the next output file and write the best hypothesis	//	output_sdb.getName(output_file_name);	File output_file;	if (!output_file.open(output_file_name, File::WRITE_ONLY)) {	  Console::increaseIndention();	  output.assign(L"\ncannot open output file: ");	  output.concat(output_file_name);	  Console::put(output);	  Console::decreaseIndention();	}	else {	  output_file.put(hypotheses);	  output_file.put(L"\n");	  output_file.close();	}      }	    } // end of looping through the input utterances    // clean up all memory    //    stack_engine_d.clear();    // close the audio database (optional)    //    if (audio_db_file_d.length() > 0) {      if (!audio_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }    // close the transcription database (optional)    //    if (transcription_db_file_d.length() > 0) {      if (!transcription_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }        // print the number of successfully processed files    //    if (verbosity_d >= Integral::BRIEF) {      output.assign(L"\nprocessed ");      output.concat(num_valid_files);      output.concat(L" file(s) successfully, attempted ");      output.concat(current_file_num);      output.concat(L" file(s), ");      output.concat(total_num_frames);      output.concat(L" frame(s)\n");          Console::put(output);    }      } // end of DECODE using STACK    // branch on TRAIN using BAUM_WELCH and function UPDATE  // does not support streaming input  //  else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH	   && function_mode_d == UPDATE && !stream_d) {    // declare local variables    //    String output;        // load the hmm models from the model file    //    if (!load()) {      return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);    }    // set the search engine mode    //    search_engine_d.setInitialLevel(initial_level_d);        search_engine_d.setSearchMode(HierarchicalSearch::TRAIN);        // load the accumulators form file    //    loadAccumulators();        // update the models using the accumulated statistics    //    update();        // write the trained models to file    //    if (!store()) {      return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);    }    // clean up memory    //    search_engine_d.clear();      } // end of TRAIN using BAUM_WELCH and UPDATE    // branch on TRAIN using BAUM_WELCH and function ACCUMULATE  //  else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH	   && function_mode_d == ACCUMULATE) {    // declare local variables    //    long num_valid_files = 0;    long current_file_num = 0;    long total_num_frames = 0;    String identifier;    Filename input_file_name;            Filename input_ID;        String output;    String train_path;    String train_file;        Filename output_file_name;    Sdb output_sdb;        Vector<VectorFloat> data;    // load the hmm models from the model file    //    if (!load()) {      return Error::handle(name(), L"run: load hmm models",			   Error::ARG, __FILE__, __LINE__);    }    // set the search engine mode    //    search_engine_d.setInitialLevel(initial_level_d);        search_engine_d.setSearchMode(HierarchicalSearch::TRAIN);        // loop through each input utterance    //    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID);	 sdb_a.gotoNext()) {      current_file_num++;      // get audio file path      //      identifier.assign(input_ID);      if (!audio_db_d.getRecord(identifier, input_file_name)) {	return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__);	          }                  // print utterance processing information      //            if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	output.assign(L"\nprocessing file ");	output.concat(current_file_num);	output.concat(L" (");	output.concat(identifier);	output.concat(L")");	output.concat(L": ");	output.concat(input_file_name);	Console::put(output);	Console::decreaseIndention();	          }                  // clear all data structures needed to decode      //      if (!search_engine_d.initializeLinearDecoder()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }      // initialize the top search level with the corresponding transcription      //  we don't need a transcription database for segmented utterances      //      if (transcription_db_file_d.length() > 0) {	initTranscription(identifier, current_file_num - 1);      }            // process the utterance file by the front end      //      fe_d.open(input_file_name);            // retrieve the all frames of data in advance      //      extractFeatures(data);            // decode the utterance      //             if (!verify_d) {	search_engine_d.linearDecoder(fe_d);      }            // pick up the best hypothesis and determine the utterance probability      //      String hypotheses;      double score = 0;      long num_frames = 0;      DoubleLinkedList<Trace> trace_path;            if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d,					 score, num_frames, trace_path)) {		// if no hypothesis found	//	hypotheses.clear();	trace_path.clear();		if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) {	  	  // print the warning message	  //	  Console::increaseIndention();	  Console::put(L"\nno hypothesis found");	  Console::decreaseIndention();	}      }      else {	num_valid_files++;	total_num_frames += num_frames;      }                   // compute the forward probabilities (alphas)      //      trellis_d = (BiGraph<TrainNode>*)NULL;      trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d);            // determine the utterance probability here again      //      computeUtterProb(score);      // print utterance probability      //            if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	output.assign(L"\naverage utterance probability: ");	output.concat(score/num_frames);	output.concat(L", number of frames: ");	output.concat(num_frames);	Console::put(output);	Console::decreaseIndention();	          }      	        // accumulate the statistics      //      accumulate(score, data);            // close the front end processing      //      fe_d.close();    } // end of looping through the input utterances    // write the accumulatots to file    //    storeAccumulators();        // close the audio database (optional)    //    if (audio_db_file_d.length() > 0) {      if (!audio_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }    // close the transcription database (optional)    //    if (transcription_db_file_d.length() > 0) {      if (!transcription_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }        // clean up all memory    //    search_engine_d.clear();    // print the number of successfully processed files    //    if (verbosity_d >= Integral::BRIEF) {      output.assign(L"\nprocessed ");      output.concat(num_valid_files);      output.concat(L" file(s) successfully, attempted ");      output.concat(current_file_num);      output.concat(L" file(s), ");      output.concat(total_num_frames);      output.concat(L" frame(s)\n");          Console::put(output);    }  } // end of TRAIN using BAUM_WELCH and ACCUMULATE    // branch on TRAIN using BAUM_WELCH and NONE  // does not support streaming input.  //  else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH	   && function_mode_d == NONE && !stream_d) {    // declare local variables    //    long num_valid_files = 0;    long current_file_num = 0;    long total_num_frames = 0;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -