📄 bw_train.cc

📁 这是处理语音信号的程序
💻 CC
📖 第 1 页 / 共 2 页
字号:
12 下一页
// file: bw_train.cc//// this program does baum-welch training//// system include files//#include <memory.h>#include <string.h>// isip include files//#include "bw_train.h"#include "bw_train_constants.h"// main program//int main(int_4 argc, char_1** argv) {    // variables to hold commandline parameters  //  char_1* params_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* transitions_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* states_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* new_trans_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* new_states_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* models_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* mfcclist_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* mfcc_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* monophones_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* phones_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* lablist_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* output_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* acc_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* acclist_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* state_occ_file = new char_1[ISIP_MAX_STRING_LENGTH];  char_1* var_floor_file = new char_1[ISIP_MAX_STRING_LENGTH];  int_4 min_model_count = BW_DEF_MIN_MOD_COUNT;    // parameters to define size  //  int_4 num_monophones = (int_4)0;  int_4 num_phones = (int_4)0;    int_4 num_features = (int_4)0;    int_4 num_trans = (int_4)0;  int_4 num_states = (int_4)0;  int_4 num_models = (int_4)0;  int_4 num_mix = (int_4)0;  int_4 num_mod = (int_4)0;  int_4 num_vect = (int_4)0;  int_4 num_phy_st = (int_4)0;  // mapping arrays for the training  //  int_4** st_map = (int_4**)NULL;  int_4* trans_map = (int_4*)NULL;  int_4* mod_map = (int_4*)NULL;  int_4* phone_map = (int_4*)NULL;  int_4* model_list = (int_4*)NULL;    // temporary variables  //  int_4 tmp = (int_4)0;  int_4 temp_st = (int_4)0;  int_4* trans_size = (int_4*)NULL;  float_8** for_probt = (float_8**)NULL;  float_8** for_probt1 = (float_8**)NULL;  float_8*** back_prob = (float_8***)NULL;  float_8 utt_prob = (float_8)0.0;  float_8** vectors = (float_8**)NULL;  float_8* max_back = (float_8*)NULL;  float_8** max_mback = (float_8**)NULL;  float_4* var_floor = (float_4*)NULL;  char_1* lab_file = (char_1*)NULL;    // default value set  //  int_4 ph_size = BW_DF_PHONE_SIZE;  int_4 input_mode = BW_DF_INPUT_MODE;   int_4 input_feature_format = BW_DF_INPUT_FEATURE_FORMAT;  int_4 output_mode = BW_DF_OUTPUT_MODE;  int_4 context_mode = BW_DF_CONTEXT_MODE;  int_4 train_mode = BW_DF_TRAIN_MODE;  int_4 occ_mode = BW_DF_OCC_MODE;  float_4 width = BW_DF_BEAM_WIDTH;  float_4 min_mpd = BW_DF_MIN_MPD;  float_4 min_occp = BW_DF_MIN_OCCP;  int_4 delta_win = BW_DF_DELTA_WIN;  logical_1 delta = BW_DF_DELTA;  logical_1 acc = BW_DF_ACC;    // to compute the average utterance score  //  float_8 utt_avg = (float_8)0.0;  float_8 cur_prob = (float_8)0.0;  int_4 num_utt = (int_4)0;    // bound array for all frames  //  int_2* lower = (int_2*)NULL;  int_2* upper = (int_2*)NULL;  // variable to cache state scores  //  float_8*** state_scores = (float_8***)NULL;    // read and deciper the commandline  //  read_cmdline_cc(argc, argv, params_file);    // file pointers  //  FILE *fp = (FILE*)NULL;  FILE *fp_acc_list = (FILE*)NULL;  FILE *fp_acc_file = (FILE*)NULL;  FILE *fml = (FILE*)NULL;  FILE *fll = (FILE*)NULL;    // open the parameter file  //  fp = fopen((char*)params_file, "r");  if (fp == (FILE*)NULL) {    fprintf(stdout, "Cannot open file %s\n", params_file);    fflush(stdout);    exit(ISIP_PROTO_ERROR);  }    // read parameters  //  read_params_cc(fp, transitions_file, states_file, new_trans_file,		 new_states_file, models_file, mfcclist_file,		 monophones_file, phones_file, lablist_file,		 acc_file, acclist_file, state_occ_file, var_floor_file,		 input_mode, output_mode, context_mode, train_mode,		 occ_mode, width, min_mpd, min_occp, min_model_count,		 input_feature_format, delta_win, delta, acc);    // close the parameter file  //  fclose(fp);    // read the monophones  //  char_1** monophones = read_monophones_cc(num_monophones, monophones_file);    // read the transition matrix  //  float_4*** transitions = read_trans_cc(num_trans, trans_size,					 transitions_file);    // read the states data  //  Train_State** states = read_states_cc(num_states, num_features, states_file);  // read the basic HMM models data  //  Train_Model** models = read_models_cc(num_models, models_file, states,				  transitions, st_map, trans_map);    // read the phone models  //  Train_Phone** phones = read_phones_cc(num_monophones, ph_size, models,				  phone_map, num_phones, phones_file,				  mod_map, context_mode);  // read variance floors  //  var_floor = new float_4[num_features];  memset(var_floor, 0, sizeof(float_4) * num_features);  read_varf_cc(var_floor, num_features, var_floor_file);  // allocate memory for the new transitions  //  float_8*** new_trans = new float_8**[num_trans];  for (int_4 i = 0; i < num_trans; i++) {        // get the size of current covariance matrix    //    tmp = trans_size[i];    new_trans[i] = new float_8*[tmp];    for (int_4 j = 0; j < tmp; j++) {      new_trans[i][j] = new float_8[tmp];      for (int_4 k = 0; k < tmp; k++) {	new_trans[i][j][k] = 0.0;      }    }  }  // allocate memory for the model access counts  //  int_4* model_access_counts = new int_4[num_models];  for (int_4 m_num = 0; m_num < num_models; m_num++) {    model_access_counts[m_num] = 0;  }    // allocate memory for the new parameters for all states  //  float_8*** train_mean = new float_8**[num_states];  float_8*** train_covar = new float_8**[num_states];  float_8** mix_weights = new float_8*[num_states];  float_8* state_occ = new float_8[num_states];  num_mix = states[1]->get_num_mixtures_cc();  for (int_4 i = 0; i < num_states; i++) {    train_mean[i] = new float_8*[num_mix];    train_covar[i] = new float_8*[num_mix];    mix_weights[i] = new float_8[num_mix];    state_occ[i] = (float_8)0.0;    for (int_4 j = 0; j < num_mix; j++) {      train_mean[i][j] = new float_8[num_features];      train_covar[i][j] = new float_8[num_features];      mix_weights[i][j] = (float_8)0.0;      for (int_4 k = 0; k < num_features; k++) {	train_mean[i][j][k] = (float_8)0.0;	train_covar[i][j][k] = (float_8)0.0;      }    }  }  // check training modes  //  if (train_mode == BW_COMBINE_MODE) {        // open the accumulator list file to read    //    fp_acc_list = fopen((char*)acclist_file, "r");    if (fp_acc_list == (FILE*)NULL) {      fprintf(stdout, "Cannot open file %s\n", acclist_file);      fflush(stdout);      exit(ISIP_PROTO_ERROR);    }        // combine the accumulators    //    combine_acc_cc(new_trans, train_mean, train_covar,		   mix_weights, state_occ, model_access_counts, num_trans,		   trans_size, num_states, num_mix, num_features, num_models,		   fp_acc_list);        // clean up    //    fclose(fp_acc_list);  }  // if it is not in combine mode  //  else {    if (train_mode == BW_BATCH_MODE) {            // open the accumulator file to write      //      fp_acc_file = fopen((char*)acc_file, "wb");      if (fp_acc_file == (FILE*)NULL) {	fprintf(stdout, "Error: cannot open file %s\n", acc_file);	fflush(stdout);	exit(ISIP_PROTO_ERROR);      }    }        // open the mfcc and lab files    //    fml = fopen((char*)mfcclist_file, "r");    fll = fopen((char*)lablist_file, "r");    lab_file = new char_1[BW_MAX_TRANS_LENGTH];        // loop over all the input mfcc files    //    while (fgets((char*)mfcc_file, ISIP_MAX_STRING_LENGTH, fml) !=	   (char*)NULL) {            expand_filename_cc(mfcc_file);      fprintf (stdout, "processing file: %s\n", (char*)mfcc_file);            // open the corresponding lab file      //      if (fgets((char*)lab_file, BW_MAX_TRANS_LENGTH, fll) == (char*)NULL) {	fprintf (stdout,		 "Error: mismatch in the number of lab and mfcc files\n");	fflush(stdout);	exit(ISIP_PROTO_ERROR);      }      else {	if (read_input_cc(vectors, num_vect, num_features, input_mode,			  mfcc_file, input_feature_format, delta_win,			  delta, acc) == ISIP_TRUE) {	  	  // read the input word sequence and find the model indexes according	  // to the context mode	  //	  get_model_list_cc(num_mod, model_list, num_phy_st, lab_file,			    monophones, num_monophones, ph_size, context_mode,			    phone_map, mod_map, models, model_access_counts);	  // initialize the bound array	  //	  lower = new int_2[num_vect+1];	  upper = new int_2[num_vect+1];
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -