📄 hmm_07.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
字号:
// file: $isip/class/pr/HiddenMarkovModel/hmm_07.cc// version: $Id: hmm_07.cc,v 1.17 2003/04/09 19:45:15 duncan Exp $//// isip include files//#include "HiddenMarkovModel.h"#include <CommandLine.h>#include <LanguageModel.h>// method: networkDecoder//// arguments://  Sdb& sdb: (input) signal data base to run on//// return: a boolean value indicating status//// this is the run method for a network decoder//boolean HiddenMarkovModel::networkDecoder(Sdb& sdb_a) {    // branch on the DECODING using VITERBI  //  if ((algorithm_d == DECODE && implementation_d == VITERBI) ||      (algorithm_d == FORCED_ALIGNMENT && implementation_d == VITERBI)) {        // declare local variables    //    long num_valid_files = 0;    long current_file_num = 0;    long total_num_frames = 0;    String identifier;    Filename input_file_name;        Filename input_ID;            String output;    Filename output_file_name;    Sdb output_sdb;    Vector<VectorFloat> data;    TranscriptionDatabase trans_db;        // load the hmm models from the model file    //    if (!load()) {      return Error::handle(name(), L"run: load hmm models",			   Error::ARG, __FILE__, __LINE__);    }        // set the search engine mode    //    search_engine_d.setSearchMode(HierarchicalSearch::DECODE);        // when the output mode is FILE    //    File total_output_file;          if (output_mode_d == FILE) {      // open the output file for the utterance hypotheses      //      if (output_file_d.length() != 0) {      	if (verbosity_d >= Integral::BRIEF) {	  Console::increaseIndention();	  output.assign(L"\nopening the output file: ");	  output.concat(output_file_d);	  Console::put(output);	  Console::decreaseIndention();	}	if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) {	  return Error::handle(name(), L"networkDecoder - error opening output file", Error::ARG, __FILE__, __LINE__);	}	else {	  total_output_file.close();	}      }    }    // when the output mode is DATABASE    //    Sof output_db_sof;    if (output_mode_d == DATABASE) {      if (output_file_d.length() > 0) {	if (verbosity_d >= Integral::BRIEF) {	  Console::increaseIndention();	  output.assign(L"\nopening the output file: ");	  output.concat(output_file_d);	  Console::put(output);	  Console::decreaseIndention();	}	// open the output file for the utterance hypotheses (TEXT)	//	if (output_type_d == TEXT) {		  if (!output_db_sof.open(output_file_d, File::WRITE_ONLY)) {	    return Error::handle(name(), L"run: opening output file",				 Error::ARG, __FILE__, __LINE__);	  }	  else {	    	    // read all identifiers form the sdb object	    //	    Vector<String> identifier_keys;	    identifier_keys.setCapacity(sdb_a.length());	    	    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID);		 sdb_a.gotoNext()) {	      identifier.assign(input_ID);	      identifier_keys.concat(identifier);	    }	    trans_db.storePartial(output_db_sof, 0, identifier_keys);	  }	} // end of output type TEXT	// open the output file for the utterance hypotheses (BINARY)	//	if (output_type_d == BINARY) {		  if (!output_db_sof.open(output_file_d,				  File::WRITE_ONLY, File::BINARY)) {	    return Error::handle(name(), L"run: opening output file",				 Error::ARG, __FILE__, __LINE__);	  }	  else {	    	    // read all identifiers form the sdb object	    //	    Vector<String> identifier_keys;	    identifier_keys.setCapacity(sdb_a.length());	    	    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID);		 sdb_a.gotoNext()) {	      identifier.assign(input_ID);	      identifier_keys.concat(identifier);	    }	    trans_db.storePartial(output_db_sof, 0, identifier_keys);	  }	} // end of output type BINARY      }    }            // loop through the input utterances    //    for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) {      current_file_num++;      // if input is streaming, set input_file_name to STREAM_FILE      //      if (stream_d) {	input_file_name.assign(File::STREAM_FILE);      }            // otherwise, get the path      //      else {	// get audio file path	//	identifier.assign(input_ID);	if (!audio_db_d.getRecord(identifier, input_file_name)) {	  input_ID.debug(L"input_ID");	  return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__);	    	}      }       // print utterance processing information      //            if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	output.assign(L"\nprocessing file ");	output.concat(current_file_num);	output.concat(L" (");	output.concat(identifier);	output.concat(L")");		output.concat(L": ");	output.concat(input_file_name);	Console::put(output);	Console::decreaseIndention();      }      // initialize the decoder      //      if (!search_engine_d.initializeNetworkDecoder()) {	return Error::handle(name(), L"networkDecoder", Error::ARG, __FILE__, __LINE__);      }            // initialize the top search level with the corresponding transcription      //  we don't need a transcription database for segmented utterances      //      if (transcription_db_file_d.length() > 0) {	initTranscription(identifier, current_file_num - 1);      }                        // process the utterance file by the front end      //      fe_d.open(input_file_name);            // retrieve the all frames of data in advance      //      extractFeatures(data);            // decode the utterance      //	      if (!verify_d) {	search_engine_d.networkDecoder(fe_d);      }      fe_d.close();            // pick up the best hypothesis and its parameters      //      String hypotheses;      float score = 0;            long num_frames = 0;      DoubleLinkedList<Instance> instance_path;            if (!search_engine_d.getLexHypotheses(hypotheses, alignment_level_d, score,					 num_frames, instance_path)) {	  	// if no hypothesis found	//	hypotheses.clear();	instance_path.clear();	if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) {	  	  // print the warning message	  //	  Console::increaseIndention();	  Console::put(L"\nno hypothesis found");	  Console::decreaseIndention();	}      }      else {		num_valid_files++;	total_num_frames += num_frames;      }      // print the detailed info about the hypothesis      //      if (verbosity_d >= Integral::BRIEF) {	Console::increaseIndention();	Console::increaseIndention();	output.assign(L"\nhyp:    ");	output.concat(hypotheses);	output.concat(L"\nscore:  ");	output.concat(score);	output.concat(L"   frames: ");	output.concat(num_frames);	Console::put(output);	Console::decreaseIndention();	Console::decreaseIndention();      }           if (output_mode_d == TRANSFORM) {	// transform the input file and its path to the output file	//	sdb_a.transformName(output_file_name, input_file_name);	// open the output file and write best hypothesis	//	File output_file;	if (!output_file.open(output_file_name, File::WRITE_ONLY)) {	  return Error::handle(name(), L"networkDecoder - error opening output file", Error::ARG, __FILE__, __LINE__);	}	output_file.put(hypotheses);	      	// close the output file	//	output_file.close();      }      if (output_mode_d == DATABASE) {	String name_00(identifier);	String gtype_00(identifier);		AnnotationGraph output_graph(name_00, gtype_00);	// convert the best search path output format	//	search_engine_d.convertLexInstances(instance_path);		// convert the best search path to an annotation graph	//	if (!createAnnotationGraph(output_graph, instance_path)) {	  return Error::handle(name(), L"networkDecoder", Error::ARG, __FILE__, __LINE__);	}	// prune the annotation graph according to output levels	//	pruneAnnotationGraph(output_graph);		// write the annotation graph to the database	//	trans_db.storePartial(output_db_sof, current_file_num - 1, output_graph);      }            if (output_mode_d == FILE) {		// output the best hypothesis and sent ID to the output file	//                                           or to stdout	//	output.assign(hypotheses);	if ((long)alignment_level_d < 0) {	  output.concat(L" (");	  output.concat(input_ID);	  output.concat(L")\n");	}	if (output_file_d.length() != 0) {	  total_output_file.open(output_file_d, File::APPEND_PLUS);	  total_output_file.put(output);	  total_output_file.close();	}	else {	  if (verbosity_d < Integral::BRIEF) {	    Console::increaseIndention();	    Console::increaseIndention();	    Console::put(output);	    Console::decreaseIndention();	    Console::decreaseIndention();	  }	}      }      if (output_mode_d == LIST) {		// output the hypothesis to the corresponding file	// from the output list	//	boolean more_files;		if (verbosity_d >= Integral::BRIEF) {	  Console::increaseIndention();	  output.assign(L"\nopening the output list: ");	  output.concat(output_file_d);	  Console::put(output);	  Console::decreaseIndention();	}		if (current_file_num == 1) {	  	  // read output files list into signal database	  //	  Sof output_list_file;	  if (!output_list_file.open(output_list_d)) {	    return Error::handle(name(), L"networkDecoder - error opening output list", Error::ARG, __FILE__, __LINE__);	  }	  	  output_sdb.read(output_list_file, 0);	  output_list_file.close();	  	  more_files = output_sdb.gotoFirst();	}	else {	  // move to the next output file	  //	  more_files = output_sdb.gotoNext();	}		if (!more_files) {	  return Error::handle(name(), L"networkDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__);	}		// open the next output file and write the best hypothesis	//	output_sdb.getName(output_file_name);		File output_file;	if (!output_file.open(output_file_name, File::WRITE_ONLY)) {	  Console::increaseIndention();	  output.assign(L"\ncannot open output file: ");	  output.concat(output_file_name);	  Console::put(output);	  Console::decreaseIndention();	}	else {	  output_file.put(hypotheses);	  output_file.put(L"\n");	  output_file.close();	}      }    } // end of looping through the input utterances    // clean up all memory    //    search_engine_d.clear();        // close database file    //    if (output_mode_d == DATABASE) {      output_db_sof.close();    }    // close the audio database (optional)    //    if (audio_db_file_d.length() > 0) {      if (!audio_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }    // close the transcription database (optional)    //    if (transcription_db_file_d.length() > 0) {      if (!transcription_db_d.close()) {	return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__);      }    }        // print the number of successfully processed files    //    output.assign(L"\nprocessed ");    output.concat(num_valid_files);    output.concat(L" file(s) successfully, attempted ");    output.concat(current_file_num);    output.concat(L" file(s), ");    output.concat(total_num_frames);    output.concat(L" frame(s)\n");        Console::put(output);      } // end of DECODE algorithm VITERBI implementation    // branch on the UNKNOWN options  //  else {        return Error::handle(name(), L"invalid algorithm and/or implementation", Error::ARG, __FILE__, __LINE__);  }    // gracefully exit  //  return true;    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -