📄 hmm_train.cc
字号:
flat = fopen((char*)mlf_file, "r"); if (flat == (FILE*)NULL) { fprintf(stdout, "Error : cannot open mlf file %s\n", mlf_file); exit(ISIP_PROTO_ERROR); } fin = fopen((char*)input_file, "r"); if (fin == (FILE*)NULL) { fprintf(stdout, "Error : cannot open input file %s\n", input_file); exit(ISIP_PROTO_ERROR); } if (op_out == ISIP_TRUE) { // for debugging use // fout = fopen((char*)output_file, "r"); if (fout == (FILE*)NULL) { fprintf(stdout, "Error : cannot open output file %s\n", output_file); exit(ISIP_PROTO_ERROR); } } // open the transcription file // fpl = fopen((char*)mlf_file, "r"); expand_filename_cc(new_states_file); expand_filename_cc(new_trans_file); // read the corresoponding data file from each list and process one // by one // while (fgets((char*)mfcc_file, ISIP_MAX_STRING_LENGTH, fin) != (char*)NULL) { // some incremental output // fprintf (stdout, "processing file: %s", (char*)mfcc_file); // for phone_level input transcriptions // if (mlf_mode == HT_MODEL_TRANS) { // make sure these lists have been freed before reallocating memory // at the end of each utterance // if (phn_list != (int_4*)NULL) { delete [] phn_list; phn_list = (int_4*)NULL; } if (phns != (int_4*)NULL) { delete [] phns; phns = (int_4*)NULL; } phn_list = new int_4[ISIP_MAX_STRING_LENGTH]; phns = new int_4[ISIP_MAX_STRING_LENGTH]; num_ph = (int_4)0; // make sure there is a corresponding transcription and output file for // each input file // get_phlist_cc(flat, phn_list, monophones, num_monophones, ph_size, context_mode, phone_map, phns, num_ph, num_sph, sph_index); if (phn_list == (int_4*)NULL) { fprintf(stdout, "Error : mismatch in the number of input mfcc files, "); fprintf(stdout, "and transcription\n"); exit(ISIP_PROTO_ERROR); } //get_phn_index_cc(phn_list, monophones, num_monophones, ph_size, // context_mode, phone_map, phn_ind, phns, num_ph); } if (op_out == ISIP_TRUE) { if (fgets((char*)output_file, ISIP_MAX_STRING_LENGTH, fout) == (char*)NULL) { fprintf(stdout, "Error : mismatch in the number of input mfcc files, "); fprintf(stdout, "and output files.\n"); exit(ISIP_PROTO_ERROR); } } // strip newline characters and expand the filenames // expand_filename_cc(mfcc_file); // check the mfcc file. we close it immediately, so we don't care what // mode it is in. // fmfc = fopen((char*)mfcc_file, "r"); if (fmfc == (FILE*)NULL) { num_warn++; fprintf(stdout, "Warning %ld : missing mfcc file %s\n", num_warn, mfcc_file); continue; } fclose(fmfc); if (op_out == ISIP_TRUE) { expand_filename_cc(output_file); } // initialize the word-level lists // for (int_4 i = 0; i < num_words; i++) { active_words[i] = (int_4)-1; word_trlist[i] = (Train_Link_list*)NULL; wdmarker[i] = (Train_Link_node*)NULL; prev_wdmark[i] = (Train_Link_node*)NULL; } // initialize the phone and state level lists // for (int_4 i = 0; i < num_phones; i++) { active_phones[i] = (int_4)-1; phone_trlist[i] = (Train_Link_list*)NULL; phmarker[i] = (Train_Link_node*)NULL; prev_phmark[i] = (Train_Link_node*)NULL; phones[i]->set_active_cc(ISIP_FALSE); state_toklist[i] = (Train_Link_list*)NULL; } // initialize the nbest lists // for (int_4 i = 0; i < num_nbest; i++) { n_best_array[i] = (Train_Trace*)NULL; } // initialize counts // current_frame = (int_4)0; num_active_ph = (int_4)0; num_active_wd = (int_4)0; num_hyps = (int_4)0; for (int_4 i = 0; i < num_levels; i++) { num_traces_total[i] = (int_4)0; num_traces_gen[i] = (int_4)0; num_traces_del[i] = (int_4)0; total_gen[i] = (int_4)0; total_del[i] = (int_4)0; } // create a list to store the active lexical trees // lextree_list = new Train_Link_list(); // mark all words as inactive // Train_Word* word = (Train_Word*)NULL; Train_Hash_cell** hash_cells = word_table->get_cells_cc(); int_4 hash_size = word_table->get_size_cc(); for (int_4 k = 0; k < hash_size; k++) { for (Train_Hash_cell* cell = hash_cells[k]; cell != (Train_Hash_cell*)NULL; cell = cell->get_next_cc()) { word = (Train_Word*)(cell->get_item_cc()); word->set_active_cc(ISIP_FALSE); } } // create the lattice // Train_Lattice* lattice = new Train_Lattice(); // for word_level input transcriptions open the mlf file to read // if(mlf_mode == HT_WORD_TRANS) { if (fpl == (FILE*)NULL) { fprintf(stdout, "Cannot open file %s\n", mlf_file); exit(ISIP_PROTO_ERROR); } lattice->read_trans_cc(fpl, word_table); } // else for phone_level alignment // else { // dummy lattice for phone alignment during training // build_dm_lat_cc(lattice, word_table); // test for built a tree for training // Train_Lattice_node* lat_node = lattice->get_start_node_cc(); Train_Link_list* next_list = lat_node->get_next_nodes_cc(); Train_Link_node* node = next_list->get_head_cc(); Train_Lattice_node* next_node = (Train_Lattice_node*)(node->get_item_cc()); next_list = next_node->get_next_nodes_cc(); node = next_list->get_head_cc(); next_node = (Train_Lattice_node*)(node->get_item_cc()); // build tree with phones in the alignment // traintree = new Train_Lex_tree; traintree->build_traintree_cc(phn_list, phns, num_ph, num_monophones, next_node); // set the lex_tree for !SENT_START // lat_node = lattice->get_start_node_cc(); next_list = lat_node->get_next_nodes_cc(); node = next_list->get_head_cc(); next_node = (Train_Lattice_node*)(node->get_item_cc()); next_node->set_lex_tree_cc(traintree); } // get the word_penalty from lattice // float_4 word_penalty = lattice->get_word_penalty_cc(); // open input feature data file to read // if (input_mode == HT_ASCII_MODE) { fpi = fopen((char*)mfcc_file, "r"); } else { fpi = fopen((char*)mfcc_file, "rb"); } if (fpi == (FILE*)NULL) { fprintf(stdout, "Cannot open file %s\n", mfcc_file); exit(ISIP_PROTO_ERROR); } // create a sentence start trace // Train_Trace* start_trace = manager->new_trace_cc(); start_trace->set_level_cc(HT_WORD_LEVEL); // set the triphone to the start triphone (silence) // start_trace->set_phone_ind_cc((int_4)start_phn); // initialize the active word list // int_4 start_wd = start_word->get_index_cc(); start_word->set_active_cc(ISIP_TRUE); active_words[num_active_wd++] = start_wd; // insert the start trace in the correct list // word_trlist[start_wd] = new Train_Link_list(); word_trlist[start_wd]->insert_cc(start_trace); wdmarker[start_wd] = word_trlist[start_wd]->get_curr_cc(); num_traces_gen[HT_WORD_LEVEL]++; // set the lattice information for the start trace // Train_Lattice_node* latnode = lattice->get_start_node_cc(); start_trace->set_lat_node_cc(latnode); // set the lexical information for the start trace // create a new tree if one doesn't exist and insert it in the list // of active trees // Train_Hash_cell* hcell = (Train_Hash_cell*)NULL; lattice->get_lat_node_cc(latnode, hcell); Train_Lex_tree* start_tree = latnode->get_lex_tree_cc(); if (start_tree == (Train_Lex_tree*)NULL) { start_tree = new Train_Lex_tree(hcell); latnode->set_lex_tree_cc(start_tree); lextree_list->insert_cc(start_tree); } // update counts // for (int_4 i = 0; i < num_levels; i++) { total_gen[i] += num_traces_gen[i]; total_del[i] += num_traces_del[i]; num_traces_total[i] += (num_traces_gen[i] - num_traces_del[i]); } // reset counts // for (int_4 i = 0; i < num_levels; i++) { num_traces_gen[i] = (int_4)0; num_traces_del[i] = (int_4)0; } num_mapmi = (int_4)0; num_steps = (int_4)0; num_vect = (int_4)0; // counting number of features in file // // if ascii mode // if (input_mode == HT_ASCII_MODE) { while (fgets((char*)temp_vect, ISIP_MAX_STRING_LENGTH, fpi) != (char*)NULL) { num_vect++; } } // if binary mode // else { float_8* temp_array = new float_8[num_features]; while ((int_4)fread(temp_vect, sizeof(float_8), num_features, fpi) == num_features) { num_vect++; } delete [] temp_array; } fclose(fpi); // open input feature data file to read // if (input_mode == HT_ASCII_MODE) { fpi = fopen((char*)mfcc_file, "r"); } else { fpi = fopen((char*)mfcc_file, "rb"); } if (fpi == (FILE*)NULL) { fprintf(stdout, "Cannot open file %s\n", mfcc_file); exit(ISIP_PROTO_ERROR); } // array used to store the input data // vectors = new float_8*[num_vect]; for (int_4 i = 0; i < num_vect; i++) { vectors[i] = new float_8[num_features]; } // initiallize the vectors array // for (int_4 i = 0; i < num_vect; i++) { for (int_4 j = 0; j < num_features; j++) { vectors[i][j] = 0.0; } } // main loop: this will loop over all traces for each frame of data // the traces are taken care of differently depending on which level // they belong to -- word or phone or state // // read the input data // while (read_input_cc(fpi, num_features, input_mode, features) == ISIP_TRUE) { // store the current frame data // for (int_4 i = 0; i < num_features; i++) { vectors[current_frame][i] = features[i]; } // reset the beam thresholds // for (int_4 i = 0; i < num_levels; i++) { beam_thresh[i] = HT_DEFAULT_SCORE; max_score[i] = HT_DEFAULT_SCORE; } // loop over all active state-level traces // evaluate the state and update the score for each trace // then find all possible transitions and advance traces // project_states_cc(state_toklist, current_frame, features, sp_phone, phone_trlist, active_phones, num_active_ph, align_mode, phones, num_traces_gen, num_traces_del, num_mapmi,max_score, phmarker); // compute the MAPMI pruning threshold score // if (mapmi_limit > (int_4)0) { // reset mapmi threshold // mapmi_thresh = HT_DEFAULT_SCORE; // sort all the active traces according to score // if (num_mapmi > mapmi_limit) { sort_traces_cc(phmarker, active_phones, num_active_ph, num_mapmi, mapmi_limit, mapmi_thresh); } } // find the phone level beam pruning threshold // if (beam_width[HT_PHONE_LEVEL] != (float_8)0) { beam_thresh[HT_PHONE_LEVEL] = max_score[HT_PHONE_LEVEL] + beam_width[HT_PHONE_LEVEL]; } // if mapmi pruning works better make that the phone-level beam // threshold // if (beam_thresh[HT_PHONE_LEVEL] < mapmi_thresh) { beam_thresh[HT_PHONE_LEVEL] = mapmi_thresh; } // mark all phone level traces below the beam width for pruning // trace_prune_cc(phone_trlist, phmarker, active_phones, num_active_ph, beam_thresh[HT_PHONE_LEVEL], HT_PHONE_LEVEL, num_traces_del[HT_PHONE_LEVEL]); // create word-level traces for the word-end phone-level traces // make sure that the inactive traces are no longer kept // active_trace_cc(phone_trlist, phmarker, prev_phmark, word_trlist, wdmarker, prev_wdmark, active_phones, num_active_ph, active_words, num_active_wd, sp_score, word_penalty, num_traces_gen, num_traces_del, max_score[HT_WORD_LEVEL]); // find the word level beam pruning threshold // if (beam_width[HT_WORD_LEVEL] != (float_8)0) { beam_thresh[HT_WORD_LEVEL] = max_score[HT_WORD_LEVEL] + beam_width[HT_WORD_LEVEL]; } // if mapmi pruning works better make that the word-level beam // threshold // if (beam_thresh[HT_WORD_LEVEL] < mapmi_thresh) { beam_thresh[HT_WORD_LEVEL] = mapmi_thresh; } // mark all word level traces below the beam width for pruning // also update the status of active lexical trees // trace_prune_cc(word_trlist, wdmarker, active_words, num_active_wd, beam_thresh[HT_WORD_LEVEL], HT_WORD_LEVEL, num_traces_del[HT_WORD_LEVEL]); // loop over all active word and phone-level traces in the // current time frame and grow the next phone // project_phones_cc(wdmarker, phmarker, state_toklist, phones, lextree_list, lattice, num_monophones, ph_size, phone_map, active_words, num_active_wd, active_phones, num_active_ph, num_traces_gen, max_score[HT_STATE_LEVEL], context_mode, mlf_mode, num_sph, sph_index);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -