📄 hmm_train.cc

📁 这是处理语音信号的程序
💻 CC
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// file: hmm_train.cc//// this program does trace projection decoding//// isip include files//#include "hmm_train.h"#include "hmm_train_constants.h"static int_4* trans_states = (int_4*)NULL;static float_4* trans_scores = (float_4*)NULL;static int_4 trans_states_size = 0;// main program//int main(int_4 argc, char_1** argv) {  // variables to hold commandline parameters  //  char_1* params_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* monophones_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* transitions_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* states_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* new_trans_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* new_states_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* batch_stats_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* acc_list_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* models_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* phones_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* lexicon_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* input_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* output_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* mfcc_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* state_occ_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* mlf_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* sph_file = new char_1[ISIP_MAX_STRING_LENGTH];  logical_1 state_occ_mode = HT_OPT_OFF;  int_4 num_levels = HT_NUM_LEVELS;  int_4 ph_size = HT_TRIPHONE_SIZE;  int_4 num_nbest = (int_4)0;  int_4 context_mode;  logical_1 output_mode = HT_BINARY_MODE;  logical_1 train_mode = HT_NORMAL;    // default recognition mode is via a lattice  //  logical_1 mlf_mode = HT_WORD_TRANS;  // parameters to define size  //  int_4 num_monophones = (int_4)0;  int_4 num_features = (int_4)0;    int_4 num_trans = (int_4)0;  int_4 num_states = (int_4)0;  int_4 num_models = (int_4)0;  int_4 num_phones = (int_4)0;    int_4 num_words = (int_4)0;  // for the TRAIN  //  int_4 file_count = (int_4)0;  int_4 num_ph = (int_4)0;  int_4* mod_map = (int_4*)NULL;  int_4** st_map = (int_4**)NULL;  int_4* trans_map = (int_4*)NULL;  int_4* phn_ind = new int_4[HT_MAX_NUM_PHONES];  int_4* phns = (int_4*)NULL;  int_4 num_warn = (int_4)0;  logical_1 op_out = ISIP_FALSE;  FILE* fmfc = (FILE*)NULL;  int_4 sp_phn_ind = (int_4)0;    // count of trace generation  //  int_4* num_traces_total = new int_4[num_levels];  int_4* num_traces_gen = new int_4[num_levels];  int_4* num_traces_del = new int_4[num_levels];  int_4* total_gen = new int_4[num_levels];  int_4* total_del = new int_4[num_levels];  // pruning thresholds and parameters  //  int_4 align_mode = HT_DEFAULT_ALIGN_MODE;  int_4 input_mode = HT_DEFAULT_INPUT_MODE;  int_4 num_mapmi = (int_4)0;  int_4 mapmi_limit = (int_4)0;  float_8 mapmi_thresh = HT_DEFAULT_SCORE;  float_8* max_score = new float_8[num_levels];  float_8* beam_width = new float_8[num_levels];  float_8* beam_thresh = new float_8[num_levels];  float_8  var_floor = HT_DEFAULT_VAR_FLOOR;  // phone list and utterance for phone level alignment  //  int_4* phn_list = (int_4*)NULL;  char_1* utterance = (char_1*)NULL;  char_1* phn_str = (char_1*)NULL;  // define the lex tree  //  Train_Lex_tree* traintree = (Train_Lex_tree*)NULL;    // initialize the pruning parameters  //  for (int_4 i = 0; i < num_levels; i++) {    max_score[i] = HT_DEFAULT_SCORE;    beam_thresh[i] = HT_DEFAULT_SCORE;    beam_width[i] = (float_8)0;  }  // misc parameters  //  int_4 current_frame = (int_4)0;  int_4 num_steps = (int_4)0;    // read and decipher the commandline  //  int_4 context_flag;  read_cmdline_cc(argc, argv, params_file, context_flag);    // file pointer  //  FILE* fp = (FILE*)NULL;    // open the params file  //  fp = fopen((char*)params_file, "r");  if (fp == (FILE*)NULL) {    fprintf(stdout, "Cannot open file %s\n", params_file);    exit(ISIP_PROTO_ERROR);  }  // context can be specified from the command line also  //  if (context_flag == 1){    context_mode = 3;  }  // read parameters  //  read_params_cc(fp, monophones_file, transitions_file, states_file,		 new_trans_file, new_states_file, batch_stats_file, 		 acc_list_file, models_file, phones_file, lexicon_file,		 align_mode, input_mode, num_levels, input_file, mlf_file,		 output_file, ph_size, num_nbest, beam_width, mapmi_limit,		 mlf_mode, context_mode, var_floor, op_out, output_mode,		 train_mode, sph_file, state_occ_file, state_occ_mode);  // create a list for n-best word traces  //  int_4 num_hyps = (int_4)0;  Train_Trace** n_best_array = new Train_Trace*[num_nbest];  // clean up  //  fclose(fp);    delete [] params_file;  // create a memory manager and set it in the trace linked lists  //  Train_Memory_manager* manager =    new Train_Memory_manager(HT_DEFAULT_BLOCK_SIZE, HT_NODE_BLOCK_SIZE);  manager->set_trace_grow_size_cc(HT_TRACE_BLOCK_SIZE);  Train_Link_list::set_manager_cc(manager);  // read the monophones list  //  char_1** monophones = read_monophones_cc(num_monophones, monophones_file);  delete [] monophones_file;    // read the transitions data  //  int_4* trans_size = (int_4*)NULL;  float_4*** transitions = read_trans_cc(num_trans, trans_size,					 transitions_file);  delete [] transitions_file;  // read the list of phones that need special handling wrt context  //  int_4* sph_index;  int_4 num_sph =0;  sph_index = read_sph_cc(num_sph, monophones, num_monophones, sph_file);  delete [] sph_file;  // read the states data  //  Train_State** states = read_states_cc(num_states, num_features, states_file);  delete [] states_file;  // read the special phone list file;  //     // if training mode involves combining accumulators  //  if (train_mode == HT_COMBINE) {    // setup the state counts    //    int_4** st_count = new int_4*[num_states];    for (int_4 i = 0; i < num_states; i++) {      st_count[i] = new int_4[states[1]->get_num_mixtures_cc()];    }    // open the accumulator list file    //    FILE* fp_acc_list = fopen((char*)acc_list_file, "r");    // combine the accumulators    //    combine_acc_cc(transitions, states, trans_size, num_states, 		   states[1]->get_num_mixtures_cc(), num_features,		   num_trans, var_floor, fp_acc_list, st_count);        // close the file    //    fclose(fp_acc_list);        // print the updated states and transitions    //    print_state_cc(new_states_file, states, num_states, num_features,		   output_mode);    print_trans_cc(new_trans_file, num_trans, trans_size, transitions);    // determine if we need to output the occupancy    //    if (state_occ_mode == HT_OPT_ON) {      // open the occupancy file      //      FILE* fp_occ_file = fopen((char*)state_occ_file, "w");      if (fp_occ_file == (FILE*)NULL) {	fprintf(stdout, "Error: unable to open state occupancy file %s\n",		state_occ_file);	exit (ISIP_PROTO_ERROR);      }      // call the print method      //      print_occ_cc(fp_occ_file, st_count, num_states,		   states[1]->get_num_mixtures_cc());      fclose(fp_occ_file);    }        for (int_4 i = 0; i < num_states; i++) {      delete [] st_count[i];    }    delete [] st_count;    st_count = (int_4**)NULL;    // exit gracefully    //    exit (ISIP_NO_ERROR);  }    // read the base HMM models, for TRAIN  //  Train_Model** models = read_new_models_cc(num_models, models_file, states,				      transitions, st_map, trans_map);  delete [] models_file;    // read the phone models data for TRAIN  //  int_4* phone_map = (int_4*)NULL;  Train_Phone** phones = read_new_phones_cc(num_monophones, ph_size, models,				      phone_map, num_phones, phones_file,				      mod_map, context_mode, sp_phn_ind);  delete [] phones_file;    // read the lexicon  //  Train_Hash_table* word_table = read_lexicon_cc(num_words, num_monophones,					   monophones, lexicon_file);  delete [] lexicon_file;  // for the TRAIN  //  int_4 num_mix = states[1]->get_num_mixtures_cc();  float_8*** train_mean = new float_8**[num_states];  for (int_4 i = 0; i < num_states; i++) {    train_mean[i] = new float_8*[num_mix];    for (int_4 j = 0; j < num_mix; j++) {      train_mean[i][j] = new float_8[num_features];    }  }  float_8*** train_covar = new float_8**[num_states];  for (int_4 i = 0; i < num_states; i++) {    train_covar[i] = new float_8*[num_mix];    for (int_4 j = 0; j < num_mix; j++) {      train_covar[i][j] = new float_8[num_features];    }  }  int_4** count = new int_4*[num_states];  for (int_4 i = 0; i < num_states; i++) {    count[i] = new int_4[num_mix];  }  int_4*** trans_count = new int_4**[num_trans];  for (int_4 i = 0; i < num_trans; i++) {    trans_count[i] = new int_4*[trans_size[i]];    for (int_4 j = 0; j < trans_size[i]; j++) {      trans_count[i][j] = new int_4[trans_size[i]];    }  }  // initialize the arrays for TRAIN  //  for (int_4 i = 0; i < num_states; i++) {    for (int_4 j = 0; j < num_mix; j++) {      for (int_4 k = 0; k < num_features; k++) {	train_mean[i][j][k] = (float_8)0.0;      }    }  }  for (int_4 i = 0; i < num_states; i++) {    for (int_4 j = 0; j < num_mix; j++) {      for (int_4 k = 0; k < num_features; k++) {	train_covar[i][j][k] = (float_8)0.0;      }    }  }  for (int_4 i = 0; i < num_states; i++) {    for (int_4 j = 0; j < num_mix; j++) {      count[i][j] = (int_4)0;    }  }    for (int_4 i = 0; i < num_trans; i++) {    for (int_4 j = 0; j < trans_size[i]; j++) {      for (int_4 k = 0; k < trans_size[i]; k++) {	trans_count[i][j][k] = (int_4)0;      }    }  }    // need to find the score to skip over the sp phone  //  float_8 sp_score = (float_8)0;  int_4 sp_phone = (int_4)-1;      // create the sp phone  //  int_4* phn = new int_4[ph_size];  phn[0] = (int_4)0;  phn[1] = HT_SP_PHONE;  phn[2] = (int_4)0;  sp_phone = phone_map[get_nphone_ind_cc(ph_size, num_monophones,					 phn,context_mode, num_sph,					 sph_index)];    // get the model params  //  Train_Model* model = phones[sp_phone]->get_model_cc();  int_4 num_st = model->get_num_states_cc();    // temp variables  //  int_4 ntrans = (int_4)0;  // find all possible start states for this phone  //  model->get_next_states_cc((int_4)0, ntrans, trans_states, trans_scores,			    trans_states_size);    // get the transition score to exit state  //  for (int_4 kk = 0; kk < ntrans; kk++) {        // check for stop state    //    if (trans_states[kk] == num_st - (int_4)1) {      sp_score = (float_8)trans_scores[kk];      break;    }  }    // set the phone for the start trace  //  phn[0] = (int_4)0;  phn[1] = HT_SILENCE_PHONE;  phn[2] = (int_4)0;  int_4 start_phn = phone_map[get_nphone_ind_cc(ph_size, num_monophones,						phn,context_mode, num_sph,						sph_index)];  delete [] phn;  phn = (int_4*)NULL;  // set the sentence start word to be the !NULL word  //  Train_Word* start_word = (Train_Word*)((word_table->hash_lookup_cc(TRAIN_WRD_NULL))->			     get_item_cc());    // create variable to read input feature data  //  float_8* features = new float_8[num_features];    // create a separate link list of tokens for each phone  //  Train_Link_list** state_toklist = new Train_Link_list*[num_phones];  // create a list of active phones  //  int_4 num_active_ph = (int_4)0;  int_4* active_phones = new int_4[num_phones];  // create a link list of active traces at the phone level for each  // phone  //  Train_Link_list** phone_trlist = new Train_Link_list*[num_phones];  Train_Link_node** phmarker = new Train_Link_node*[num_phones];  Train_Link_node** prev_phmark = new Train_Link_node*[num_phones];  // create a list of active words  //  int_4 num_active_wd = (int_4)0;  int_4* active_words = new int_4[num_words];  // create a link list of active traces at the word level for each  // word  //  Train_Link_list** word_trlist = new Train_Link_list*[num_words];  Train_Link_node** wdmarker = new Train_Link_node*[num_words];  Train_Link_node** prev_wdmark = new Train_Link_node*[num_words];  // create a link list of active lexical trees  //  Train_Link_list* lextree_list = (Train_Link_list*)NULL;  // TRAIN open file  //  FILE* flat = (FILE*)NULL;  FILE* fin = (FILE*)NULL;  FILE* fout = (FILE*)NULL;  FILE* fpl = (FILE*)NULL;  FILE* fpi = (FILE*)NULL;  // variable to count the number of feature vectors in the current input  // file  //  int_4 num_vect = (int_4)0;  float_8** vectors = (float_8**)NULL;  char_1* temp_vect = new char_1[ISIP_MAX_STRING_LENGTH];    // open the files containing the lists of input files, mlf_file  // and output files respectively  //
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -