📄 hmm_train.cc
字号:
// file: hmm_train.cc//// this program does trace projection decoding//// isip include files//#include "hmm_train.h"#include "hmm_train_constants.h"static int_4* trans_states = (int_4*)NULL;static float_4* trans_scores = (float_4*)NULL;static int_4 trans_states_size = 0;// main program//int main(int_4 argc, char_1** argv) { // variables to hold commandline parameters // char_1* params_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* monophones_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* transitions_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* states_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* new_trans_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* new_states_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* batch_stats_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* acc_list_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* models_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* phones_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* lexicon_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* input_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* output_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* mfcc_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* state_occ_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* mlf_file = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* sph_file = new char_1[ISIP_MAX_STRING_LENGTH]; logical_1 state_occ_mode = HT_OPT_OFF; int_4 num_levels = HT_NUM_LEVELS; int_4 ph_size = HT_TRIPHONE_SIZE; int_4 num_nbest = (int_4)0; int_4 context_mode; logical_1 output_mode = HT_BINARY_MODE; logical_1 train_mode = HT_NORMAL; // default recognition mode is via a lattice // logical_1 mlf_mode = HT_WORD_TRANS; // parameters to define size // int_4 num_monophones = (int_4)0; int_4 num_features = (int_4)0; int_4 num_trans = (int_4)0; int_4 num_states = (int_4)0; int_4 num_models = (int_4)0; int_4 num_phones = (int_4)0; int_4 num_words = (int_4)0; // for the TRAIN // int_4 file_count = (int_4)0; int_4 num_ph = (int_4)0; int_4* mod_map = (int_4*)NULL; int_4** st_map = (int_4**)NULL; int_4* trans_map = (int_4*)NULL; int_4* phn_ind = new int_4[HT_MAX_NUM_PHONES]; int_4* phns = (int_4*)NULL; int_4 num_warn = (int_4)0; logical_1 op_out = ISIP_FALSE; FILE* fmfc = (FILE*)NULL; int_4 sp_phn_ind = (int_4)0; // count of trace generation // int_4* num_traces_total = new int_4[num_levels]; int_4* num_traces_gen = new int_4[num_levels]; int_4* num_traces_del = new int_4[num_levels]; int_4* total_gen = new int_4[num_levels]; int_4* total_del = new int_4[num_levels]; // pruning thresholds and parameters // int_4 align_mode = HT_DEFAULT_ALIGN_MODE; int_4 input_mode = HT_DEFAULT_INPUT_MODE; int_4 num_mapmi = (int_4)0; int_4 mapmi_limit = (int_4)0; float_8 mapmi_thresh = HT_DEFAULT_SCORE; float_8* max_score = new float_8[num_levels]; float_8* beam_width = new float_8[num_levels]; float_8* beam_thresh = new float_8[num_levels]; float_8 var_floor = HT_DEFAULT_VAR_FLOOR; // phone list and utterance for phone level alignment // int_4* phn_list = (int_4*)NULL; char_1* utterance = (char_1*)NULL; char_1* phn_str = (char_1*)NULL; // define the lex tree // Train_Lex_tree* traintree = (Train_Lex_tree*)NULL; // initialize the pruning parameters // for (int_4 i = 0; i < num_levels; i++) { max_score[i] = HT_DEFAULT_SCORE; beam_thresh[i] = HT_DEFAULT_SCORE; beam_width[i] = (float_8)0; } // misc parameters // int_4 current_frame = (int_4)0; int_4 num_steps = (int_4)0; // read and decipher the commandline // int_4 context_flag; read_cmdline_cc(argc, argv, params_file, context_flag); // file pointer // FILE* fp = (FILE*)NULL; // open the params file // fp = fopen((char*)params_file, "r"); if (fp == (FILE*)NULL) { fprintf(stdout, "Cannot open file %s\n", params_file); exit(ISIP_PROTO_ERROR); } // context can be specified from the command line also // if (context_flag == 1){ context_mode = 3; } // read parameters // read_params_cc(fp, monophones_file, transitions_file, states_file, new_trans_file, new_states_file, batch_stats_file, acc_list_file, models_file, phones_file, lexicon_file, align_mode, input_mode, num_levels, input_file, mlf_file, output_file, ph_size, num_nbest, beam_width, mapmi_limit, mlf_mode, context_mode, var_floor, op_out, output_mode, train_mode, sph_file, state_occ_file, state_occ_mode); // create a list for n-best word traces // int_4 num_hyps = (int_4)0; Train_Trace** n_best_array = new Train_Trace*[num_nbest]; // clean up // fclose(fp); delete [] params_file; // create a memory manager and set it in the trace linked lists // Train_Memory_manager* manager = new Train_Memory_manager(HT_DEFAULT_BLOCK_SIZE, HT_NODE_BLOCK_SIZE); manager->set_trace_grow_size_cc(HT_TRACE_BLOCK_SIZE); Train_Link_list::set_manager_cc(manager); // read the monophones list // char_1** monophones = read_monophones_cc(num_monophones, monophones_file); delete [] monophones_file; // read the transitions data // int_4* trans_size = (int_4*)NULL; float_4*** transitions = read_trans_cc(num_trans, trans_size, transitions_file); delete [] transitions_file; // read the list of phones that need special handling wrt context // int_4* sph_index; int_4 num_sph =0; sph_index = read_sph_cc(num_sph, monophones, num_monophones, sph_file); delete [] sph_file; // read the states data // Train_State** states = read_states_cc(num_states, num_features, states_file); delete [] states_file; // read the special phone list file; // // if training mode involves combining accumulators // if (train_mode == HT_COMBINE) { // setup the state counts // int_4** st_count = new int_4*[num_states]; for (int_4 i = 0; i < num_states; i++) { st_count[i] = new int_4[states[1]->get_num_mixtures_cc()]; } // open the accumulator list file // FILE* fp_acc_list = fopen((char*)acc_list_file, "r"); // combine the accumulators // combine_acc_cc(transitions, states, trans_size, num_states, states[1]->get_num_mixtures_cc(), num_features, num_trans, var_floor, fp_acc_list, st_count); // close the file // fclose(fp_acc_list); // print the updated states and transitions // print_state_cc(new_states_file, states, num_states, num_features, output_mode); print_trans_cc(new_trans_file, num_trans, trans_size, transitions); // determine if we need to output the occupancy // if (state_occ_mode == HT_OPT_ON) { // open the occupancy file // FILE* fp_occ_file = fopen((char*)state_occ_file, "w"); if (fp_occ_file == (FILE*)NULL) { fprintf(stdout, "Error: unable to open state occupancy file %s\n", state_occ_file); exit (ISIP_PROTO_ERROR); } // call the print method // print_occ_cc(fp_occ_file, st_count, num_states, states[1]->get_num_mixtures_cc()); fclose(fp_occ_file); } for (int_4 i = 0; i < num_states; i++) { delete [] st_count[i]; } delete [] st_count; st_count = (int_4**)NULL; // exit gracefully // exit (ISIP_NO_ERROR); } // read the base HMM models, for TRAIN // Train_Model** models = read_new_models_cc(num_models, models_file, states, transitions, st_map, trans_map); delete [] models_file; // read the phone models data for TRAIN // int_4* phone_map = (int_4*)NULL; Train_Phone** phones = read_new_phones_cc(num_monophones, ph_size, models, phone_map, num_phones, phones_file, mod_map, context_mode, sp_phn_ind); delete [] phones_file; // read the lexicon // Train_Hash_table* word_table = read_lexicon_cc(num_words, num_monophones, monophones, lexicon_file); delete [] lexicon_file; // for the TRAIN // int_4 num_mix = states[1]->get_num_mixtures_cc(); float_8*** train_mean = new float_8**[num_states]; for (int_4 i = 0; i < num_states; i++) { train_mean[i] = new float_8*[num_mix]; for (int_4 j = 0; j < num_mix; j++) { train_mean[i][j] = new float_8[num_features]; } } float_8*** train_covar = new float_8**[num_states]; for (int_4 i = 0; i < num_states; i++) { train_covar[i] = new float_8*[num_mix]; for (int_4 j = 0; j < num_mix; j++) { train_covar[i][j] = new float_8[num_features]; } } int_4** count = new int_4*[num_states]; for (int_4 i = 0; i < num_states; i++) { count[i] = new int_4[num_mix]; } int_4*** trans_count = new int_4**[num_trans]; for (int_4 i = 0; i < num_trans; i++) { trans_count[i] = new int_4*[trans_size[i]]; for (int_4 j = 0; j < trans_size[i]; j++) { trans_count[i][j] = new int_4[trans_size[i]]; } } // initialize the arrays for TRAIN // for (int_4 i = 0; i < num_states; i++) { for (int_4 j = 0; j < num_mix; j++) { for (int_4 k = 0; k < num_features; k++) { train_mean[i][j][k] = (float_8)0.0; } } } for (int_4 i = 0; i < num_states; i++) { for (int_4 j = 0; j < num_mix; j++) { for (int_4 k = 0; k < num_features; k++) { train_covar[i][j][k] = (float_8)0.0; } } } for (int_4 i = 0; i < num_states; i++) { for (int_4 j = 0; j < num_mix; j++) { count[i][j] = (int_4)0; } } for (int_4 i = 0; i < num_trans; i++) { for (int_4 j = 0; j < trans_size[i]; j++) { for (int_4 k = 0; k < trans_size[i]; k++) { trans_count[i][j][k] = (int_4)0; } } } // need to find the score to skip over the sp phone // float_8 sp_score = (float_8)0; int_4 sp_phone = (int_4)-1; // create the sp phone // int_4* phn = new int_4[ph_size]; phn[0] = (int_4)0; phn[1] = HT_SP_PHONE; phn[2] = (int_4)0; sp_phone = phone_map[get_nphone_ind_cc(ph_size, num_monophones, phn,context_mode, num_sph, sph_index)]; // get the model params // Train_Model* model = phones[sp_phone]->get_model_cc(); int_4 num_st = model->get_num_states_cc(); // temp variables // int_4 ntrans = (int_4)0; // find all possible start states for this phone // model->get_next_states_cc((int_4)0, ntrans, trans_states, trans_scores, trans_states_size); // get the transition score to exit state // for (int_4 kk = 0; kk < ntrans; kk++) { // check for stop state // if (trans_states[kk] == num_st - (int_4)1) { sp_score = (float_8)trans_scores[kk]; break; } } // set the phone for the start trace // phn[0] = (int_4)0; phn[1] = HT_SILENCE_PHONE; phn[2] = (int_4)0; int_4 start_phn = phone_map[get_nphone_ind_cc(ph_size, num_monophones, phn,context_mode, num_sph, sph_index)]; delete [] phn; phn = (int_4*)NULL; // set the sentence start word to be the !NULL word // Train_Word* start_word = (Train_Word*)((word_table->hash_lookup_cc(TRAIN_WRD_NULL))-> get_item_cc()); // create variable to read input feature data // float_8* features = new float_8[num_features]; // create a separate link list of tokens for each phone // Train_Link_list** state_toklist = new Train_Link_list*[num_phones]; // create a list of active phones // int_4 num_active_ph = (int_4)0; int_4* active_phones = new int_4[num_phones]; // create a link list of active traces at the phone level for each // phone // Train_Link_list** phone_trlist = new Train_Link_list*[num_phones]; Train_Link_node** phmarker = new Train_Link_node*[num_phones]; Train_Link_node** prev_phmark = new Train_Link_node*[num_phones]; // create a list of active words // int_4 num_active_wd = (int_4)0; int_4* active_words = new int_4[num_words]; // create a link list of active traces at the word level for each // word // Train_Link_list** word_trlist = new Train_Link_list*[num_words]; Train_Link_node** wdmarker = new Train_Link_node*[num_words]; Train_Link_node** prev_wdmark = new Train_Link_node*[num_words]; // create a link list of active lexical trees // Train_Link_list* lextree_list = (Train_Link_list*)NULL; // TRAIN open file // FILE* flat = (FILE*)NULL; FILE* fin = (FILE*)NULL; FILE* fout = (FILE*)NULL; FILE* fpl = (FILE*)NULL; FILE* fpi = (FILE*)NULL; // variable to count the number of feature vectors in the current input // file // int_4 num_vect = (int_4)0; float_8** vectors = (float_8**)NULL; char_1* temp_vect = new char_1[ISIP_MAX_STRING_LENGTH]; // open the files containing the lists of input files, mlf_file // and output files respectively //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -