⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmm_train.cc

📁 这是处理语音信号的程序
💻 CC
📖 第 1 页 / 共 3 页
字号:
  flat = fopen((char*)mlf_file, "r");  if (flat == (FILE*)NULL) {    fprintf(stdout, "Error : cannot open mlf file %s\n",	    mlf_file);    exit(ISIP_PROTO_ERROR);  }    fin = fopen((char*)input_file, "r");  if (fin == (FILE*)NULL) {    fprintf(stdout, "Error : cannot open input file %s\n", input_file);    exit(ISIP_PROTO_ERROR);  }  if (op_out == ISIP_TRUE) {        // for debugging use    //    fout = fopen((char*)output_file, "r");    if (fout == (FILE*)NULL) {      fprintf(stdout, "Error : cannot open output file %s\n", output_file);      exit(ISIP_PROTO_ERROR);    }  }    // open the transcription file  //  fpl = fopen((char*)mlf_file, "r");  expand_filename_cc(new_states_file);  expand_filename_cc(new_trans_file);  // read the corresoponding data file from each list and process one  // by one  //   while (fgets((char*)mfcc_file, ISIP_MAX_STRING_LENGTH, fin) !=	 (char*)NULL) {    // some incremental output    //    fprintf (stdout, "processing file: %s", (char*)mfcc_file);        // for phone_level input transcriptions    //    if (mlf_mode == HT_MODEL_TRANS) {      // make sure these lists have been freed before reallocating memory      // at the end of each utterance      //      if (phn_list != (int_4*)NULL) {	delete [] phn_list;	phn_list = (int_4*)NULL;      }      if (phns != (int_4*)NULL) {	delete [] phns;	phns = (int_4*)NULL;      }      phn_list = new int_4[ISIP_MAX_STRING_LENGTH];      phns = new int_4[ISIP_MAX_STRING_LENGTH];      num_ph = (int_4)0;      // make sure there is a corresponding transcription and output file for      // each input file      //      get_phlist_cc(flat, phn_list, monophones, num_monophones, ph_size,		    context_mode, phone_map, phns, num_ph, num_sph, sph_index);      if (phn_list == (int_4*)NULL) {	fprintf(stdout,		"Error : mismatch in the number of input mfcc files, ");	fprintf(stdout, "and transcription\n");	exit(ISIP_PROTO_ERROR);      }      //get_phn_index_cc(phn_list, monophones, num_monophones, ph_size,      //	       context_mode, phone_map, phn_ind, phns, num_ph);          }        if (op_out == ISIP_TRUE) {      if (fgets((char*)output_file, ISIP_MAX_STRING_LENGTH, fout) ==	  (char*)NULL) {	fprintf(stdout,		"Error : mismatch in the number of input mfcc files, ");	fprintf(stdout, "and output files.\n");	exit(ISIP_PROTO_ERROR);      }    }    // strip newline characters and expand the filenames    //    expand_filename_cc(mfcc_file);    // check the mfcc file. we close it immediately, so we don't care what    // mode it is in.    //    fmfc = fopen((char*)mfcc_file, "r");        if (fmfc == (FILE*)NULL) {      num_warn++;      fprintf(stdout, "Warning %ld : missing mfcc file %s\n", num_warn,	      mfcc_file);      continue;    }    fclose(fmfc);        if (op_out == ISIP_TRUE) {      expand_filename_cc(output_file);    }        // initialize the word-level lists    //    for (int_4 i = 0; i < num_words; i++) {      active_words[i] = (int_4)-1;      word_trlist[i] = (Train_Link_list*)NULL;      wdmarker[i] = (Train_Link_node*)NULL;      prev_wdmark[i] = (Train_Link_node*)NULL;    }        // initialize the phone and state level lists    //    for (int_4 i = 0; i < num_phones; i++) {      active_phones[i] = (int_4)-1;      phone_trlist[i] = (Train_Link_list*)NULL;      phmarker[i] = (Train_Link_node*)NULL;      prev_phmark[i] = (Train_Link_node*)NULL;      phones[i]->set_active_cc(ISIP_FALSE);      state_toklist[i] = (Train_Link_list*)NULL;    }        // initialize the nbest lists    //    for (int_4 i = 0; i < num_nbest; i++) {      n_best_array[i] = (Train_Trace*)NULL;    }        // initialize counts    //    current_frame = (int_4)0;    num_active_ph = (int_4)0;    num_active_wd = (int_4)0;    num_hyps = (int_4)0;    for (int_4 i = 0; i < num_levels; i++) {      num_traces_total[i] = (int_4)0;      num_traces_gen[i] = (int_4)0;      num_traces_del[i] = (int_4)0;      total_gen[i] = (int_4)0;      total_del[i] = (int_4)0;    }            // create a list to store the active lexical trees    //    lextree_list = new Train_Link_list();        // mark all words as inactive    //    Train_Word* word = (Train_Word*)NULL;    Train_Hash_cell** hash_cells = word_table->get_cells_cc();    int_4 hash_size = word_table->get_size_cc();    for (int_4 k = 0; k < hash_size; k++) {      for (Train_Hash_cell* cell = hash_cells[k]; cell != (Train_Hash_cell*)NULL;	   cell = cell->get_next_cc()) {	word = (Train_Word*)(cell->get_item_cc());	word->set_active_cc(ISIP_FALSE);      }    }        // create the lattice    //    Train_Lattice* lattice = new Train_Lattice();    // for word_level input transcriptions open the mlf file to read    //    if(mlf_mode == HT_WORD_TRANS) {      if (fpl == (FILE*)NULL) {	fprintf(stdout, "Cannot open file %s\n", mlf_file);	exit(ISIP_PROTO_ERROR);      }            lattice->read_trans_cc(fpl, word_table);    }    // else for phone_level alignment    //    else {            // dummy lattice for phone alignment during training      //       build_dm_lat_cc(lattice, word_table);                // test for built a tree for training      //      Train_Lattice_node* lat_node = lattice->get_start_node_cc();      Train_Link_list* next_list = lat_node->get_next_nodes_cc();      Train_Link_node* node = next_list->get_head_cc();      Train_Lattice_node* next_node = (Train_Lattice_node*)(node->get_item_cc());            next_list = next_node->get_next_nodes_cc();      node = next_list->get_head_cc();      next_node = (Train_Lattice_node*)(node->get_item_cc());            // build tree with phones in the alignment      //      traintree = new Train_Lex_tree;            traintree->build_traintree_cc(phn_list, phns, num_ph,				    num_monophones, next_node);            // set the lex_tree for !SENT_START      //        lat_node = lattice->get_start_node_cc();      next_list = lat_node->get_next_nodes_cc();      node = next_list->get_head_cc();      next_node = (Train_Lattice_node*)(node->get_item_cc());      next_node->set_lex_tree_cc(traintree);    }    // get the word_penalty from lattice    //    float_4 word_penalty = lattice->get_word_penalty_cc();           // open input feature data file to read    //    if (input_mode == HT_ASCII_MODE) {      fpi = fopen((char*)mfcc_file, "r");    }    else {      fpi = fopen((char*)mfcc_file, "rb");    }    if (fpi == (FILE*)NULL) {      fprintf(stdout, "Cannot open file %s\n", mfcc_file);      exit(ISIP_PROTO_ERROR);    }        // create a sentence start trace    //    Train_Trace* start_trace = manager->new_trace_cc();        start_trace->set_level_cc(HT_WORD_LEVEL);        // set the triphone to the start triphone (silence)    //    start_trace->set_phone_ind_cc((int_4)start_phn);        // initialize the active word list    //    int_4 start_wd = start_word->get_index_cc();    start_word->set_active_cc(ISIP_TRUE);    active_words[num_active_wd++] = start_wd;        // insert the start trace in the correct list    //    word_trlist[start_wd] = new Train_Link_list();    word_trlist[start_wd]->insert_cc(start_trace);    wdmarker[start_wd] = word_trlist[start_wd]->get_curr_cc();    num_traces_gen[HT_WORD_LEVEL]++;        // set the lattice information for the start trace    //    Train_Lattice_node* latnode = lattice->get_start_node_cc();    start_trace->set_lat_node_cc(latnode);        // set the lexical information for the start trace    // create a new tree if one doesn't exist and insert it in the list    // of active trees    //    Train_Hash_cell* hcell = (Train_Hash_cell*)NULL;    lattice->get_lat_node_cc(latnode, hcell);    Train_Lex_tree* start_tree = latnode->get_lex_tree_cc();    if (start_tree == (Train_Lex_tree*)NULL) {      start_tree = new Train_Lex_tree(hcell);      latnode->set_lex_tree_cc(start_tree);      lextree_list->insert_cc(start_tree);    }        // update counts    //    for (int_4 i = 0; i < num_levels; i++) {      total_gen[i] += num_traces_gen[i];      total_del[i] += num_traces_del[i];      num_traces_total[i] += (num_traces_gen[i] - num_traces_del[i]);    }        // reset counts    //    for (int_4 i = 0; i < num_levels; i++) {      num_traces_gen[i] = (int_4)0;      num_traces_del[i] = (int_4)0;    }        num_mapmi = (int_4)0;    num_steps = (int_4)0;        num_vect = (int_4)0;        // counting number of features in file        //    // if ascii mode    //    if (input_mode == HT_ASCII_MODE) {      while (fgets((char*)temp_vect, ISIP_MAX_STRING_LENGTH, fpi) !=	     (char*)NULL) {	num_vect++;      }    }    // if binary mode    //    else {      float_8* temp_array = new float_8[num_features];      while ((int_4)fread(temp_vect, sizeof(float_8), num_features, fpi)	     == num_features) {	num_vect++;      }      delete [] temp_array;    }    fclose(fpi);        // open input feature data file to read    //    if (input_mode == HT_ASCII_MODE) {      fpi = fopen((char*)mfcc_file, "r");    }    else {      fpi = fopen((char*)mfcc_file, "rb");    }    if (fpi == (FILE*)NULL) {      fprintf(stdout, "Cannot open file %s\n", mfcc_file);      exit(ISIP_PROTO_ERROR);    }        // array used to store the input data    //    vectors = new float_8*[num_vect];    for (int_4 i = 0; i < num_vect; i++) {      vectors[i] = new float_8[num_features];    }        // initiallize the vectors array    //    for (int_4 i = 0; i < num_vect; i++) {      for (int_4 j = 0; j < num_features; j++) {	vectors[i][j] = 0.0;      }    }        // main loop: this will loop over all traces for each frame of data    // the traces are taken care of differently depending on which level    // they belong to -- word or phone or state    //        // read the input data    //    while (read_input_cc(fpi, num_features, input_mode, features)	   == ISIP_TRUE) {            // store the current frame data      //      for (int_4 i = 0; i < num_features; i++) {	vectors[current_frame][i] = features[i];      }            // reset the beam thresholds      //      for (int_4 i = 0; i < num_levels; i++) {	beam_thresh[i] = HT_DEFAULT_SCORE;	max_score[i] = HT_DEFAULT_SCORE;      }            // loop over all active state-level traces      // evaluate the state and update the score for each trace      // then find all possible transitions and advance traces      //      project_states_cc(state_toklist, current_frame, features, sp_phone,			phone_trlist, active_phones, num_active_ph,			align_mode, phones, num_traces_gen, num_traces_del,			num_mapmi,max_score, phmarker);            // compute the MAPMI pruning threshold score      //      if (mapmi_limit > (int_4)0) {		// reset mapmi threshold	//	mapmi_thresh = HT_DEFAULT_SCORE;		// sort all the active traces according to score	//	if (num_mapmi > mapmi_limit) {	  sort_traces_cc(phmarker, active_phones, num_active_ph,			 num_mapmi, mapmi_limit, mapmi_thresh);	}      }            // find the phone level beam pruning threshold      //      if (beam_width[HT_PHONE_LEVEL] != (float_8)0) {	beam_thresh[HT_PHONE_LEVEL] = max_score[HT_PHONE_LEVEL] +	  beam_width[HT_PHONE_LEVEL];      }            // if mapmi pruning works better make that the phone-level beam      // threshold      //      if (beam_thresh[HT_PHONE_LEVEL] < mapmi_thresh) {	beam_thresh[HT_PHONE_LEVEL] = mapmi_thresh;      }            // mark all phone level traces below the beam width for pruning      //      trace_prune_cc(phone_trlist, phmarker, active_phones, num_active_ph,		     beam_thresh[HT_PHONE_LEVEL], HT_PHONE_LEVEL,		     num_traces_del[HT_PHONE_LEVEL]);            // create word-level traces for the word-end phone-level traces      // make sure that the inactive traces are no longer kept      //      active_trace_cc(phone_trlist, phmarker, prev_phmark,		      word_trlist, wdmarker, prev_wdmark,		      active_phones, num_active_ph,		      active_words, num_active_wd,		      sp_score, word_penalty,		      num_traces_gen, num_traces_del,		      max_score[HT_WORD_LEVEL]);            // find the word level beam pruning threshold      //      if (beam_width[HT_WORD_LEVEL] != (float_8)0) {	beam_thresh[HT_WORD_LEVEL] = max_score[HT_WORD_LEVEL] +	  beam_width[HT_WORD_LEVEL];      }            // if mapmi pruning works better make that the word-level beam      // threshold      //      if (beam_thresh[HT_WORD_LEVEL] < mapmi_thresh) {	beam_thresh[HT_WORD_LEVEL] = mapmi_thresh;      }            // mark all word level traces below the beam width for pruning      // also update the status of active lexical trees      //      trace_prune_cc(word_trlist, wdmarker, active_words, num_active_wd,		     beam_thresh[HT_WORD_LEVEL], HT_WORD_LEVEL,		     num_traces_del[HT_WORD_LEVEL]);            // loop over all active word and phone-level traces in the      // current time frame and grow the next phone      //      project_phones_cc(wdmarker, phmarker, state_toklist, phones,			lextree_list, lattice, num_monophones, ph_size,			phone_map, active_words, num_active_wd,			active_phones, num_active_ph, num_traces_gen,			max_score[HT_STATE_LEVEL], context_mode,			mlf_mode, num_sph, sph_index);	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -