📄 ht_param_0.cc
字号:
// file: ht_param_0.cc//// function to read data from parameter file//// system include files//#include <string.h>// isip include files//#include "hmm_train.h"#include "hmm_train_constants.h"// method: read_params_cc//// arguments:// FILE* fp : (input) file pointer to params file// char_1* mono_file : (output) name of file containing monophones// char_1* trans_file : (input) name of transitions data file// char_1* state_file : (output) name of file containing updated states data// char_1* new_trans_file : (output) name of updated transitions data file// char_1* new_state_file : (input) name of file containing states data// char_1* batch_stats_file : (input) name of file containing viterbi// accumulators// char_1* acc_list_file : (input) name of file containing list of viterbi// accumulators// char_1* model_file : (output) name of file containing models// char_1* phone_file : (output) name of file containing triphones// char_1* lex_file : (output) name of lexicon file// int_4& align_mode : (output) alignment mode (state/model)// int_4& input_mode : (output) input mode (ascii/binary)// int_4 level : (input) number of levels in the search hierarchy// char_1* input_file : (output) name of the input file// char_1* mlf_file : (output) name of the mlf file// char_1* output_file : (output) name of the output file// int_4& phn : (output) size of phonetic context// int_4& nbst : (output) n-best word hyps// float_8*& beam : (output) beam widths for pruning// int_4& mapmi : (output) maximum number of active traces// logical_1& mlf_mode_flag : (output) if this is model mlf or word mlf// int_4& context_mode : (output) context_mode// float_8*& var_floor: (output) variance floor// logical_1& op_out: (output) the flag of output option// logical_1& output_mode: (output) format for the output states file// logical_1& train_mode: (output) training mode (batch/normal)// char_1* sph_file: (output) name of the specail phone list file// char_1* state_occ_file: (output) name of the state occupancy file// char_1* occ_mode: (output) whether or not we print occupancies//// return: a logical flag to indicate success//logical_1 read_params_cc(FILE* fp_a, char_1* mono_file_a, char_1* trans_file_a, char_1* state_file_a, char_1* new_trans_file_a, char_1* new_state_file_a, char_1* batch_stats_file_a, char_1* acc_list_file_a, char_1* model_file_a, char_1* phone_file_a, char_1* lex_file_a, int_4& align_mode_a, int_4& input_mode_a, int_4 level_a, char_1* input_file_a, char_1* mlf_file_a, char_1* output_file_a, int_4& phn_a, int_4& nbst_a, float_8*& beam_a, int_4& mapmi_a, logical_1& mlf_mode_a, int_4& context_mode_a, float_8& var_floor_a, logical_1& op_out_a, logical_1& output_mode_a, logical_1& train_mode_a, char_1* sph_file_a, char_1* state_occ_file_a, logical_1& occ_mode_a) { // string to hold temporary data // char_1* tmp = new char_1[ISIP_MAX_STRING_LENGTH]; tmp = (char_1*)memset(tmp, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tag_str = new char_1[ISIP_MAX_STRING_LENGTH]; tag_str = (char_1*)memset(tag_str, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tmp1 = new char_1[ISIP_MAX_STRING_LENGTH]; tmp1 = (char_1*)memset(tmp1, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tmp2 = new char_1[ISIP_MAX_STRING_LENGTH]; tmp2 = (char_1*)memset(tmp2, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tmp3 = new char_1[ISIP_MAX_STRING_LENGTH]; tmp3 = (char_1*)memset(tmp3, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tmp4 = new char_1[ISIP_MAX_STRING_LENGTH]; tmp4 = (char_1*)memset(tmp4, (int_4)0, ISIP_MAX_STRING_LENGTH); char_1* tmp5 = new char_1[ISIP_MAX_STRING_LENGTH]; tmp5 = (char_1*)memset(tmp5, (int_4)0, ISIP_MAX_STRING_LENGTH); // read data from file // while (fgets((char*)tmp, ISIP_MAX_STRING_LENGTH - 1, fp_a) != (char*)NULL) { // ignore comment lines // if (tmp[0] == (char_1)'#') { // do nothing // continue; } // get the first string which should be the tag string. if the line is // empty then 0 should be returned // if (sscanf((char*)tmp, "%s", tag_str) == 0) { continue; } // read name of monophones file // if (strcmp((char*)tag_str, (char*)HT_MONOPHONE_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)mono_file_a, (char*)tmp3); expand_filename_cc(mono_file_a); } } //read the input file // else if (strcmp((char*)tag_str, HT_INPUT_FILE ) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)input_file_a, (char*)tmp3); expand_filename_cc(input_file_a); } } // read the mlf file // else if (strcmp((char*)tag_str, HT_MLF_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)mlf_file_a, (char*)tmp3); expand_filename_cc(mlf_file_a); } } // read the output file // else if (strcmp((char*)tag_str,HT_OUTPUT_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)output_file_a, (char*)tmp3); expand_filename_cc(output_file_a); op_out_a = ISIP_TRUE; } } //set the phonetic context size // else if (strcmp(HT_CONTEXT_SIZE, (char*)tag_str) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { // curently assumes 3 anyways // //phn_a = (int_4)atoi(tmp3); } } // read the value for n-best // else if (strcmp(HT_NBEST, (char*)tag_str) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { nbst_a = (int_4)atoi((char*)tmp3); } } // set the beam widths if beam pruning is allowed // else if (strcmp(HT_BEAM_WIDTH, (char*)tag_str) == 0) { if (sscanf((char*)tmp, "%s%s%s%s%s", tmp1, tmp2, tmp3, tmp4, tmp5) == 5) { beam_a[0] = -(float_8)fabs((float_8)atof((char*)tmp3)); beam_a[1] = -(float_8)fabs((float_8)atof((char*)tmp4)); beam_a[2] = -(float_8)fabs((float_8)atof((char*)tmp5)); } } // set the variance floor for training // else if (strcmp((char*)HT_VARIANCE_FLOOR_NAME, (char*)tag_str) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { var_floor_a = (float_8)atof((char*)tmp3); } } // set the limit on maximum active phone model instances if // specified // else if (strcmp(HT_MAPMI, (char*)tag_str) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { mapmi_a = (int_4)atof((char*)tmp3); if (mapmi_a < (int_4)0) { mapmi_a = (int_4)0; } } } // read name of transitions data file // else if (strcmp((char*)tag_str, HT_TRANSITIONS_FILE ) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)trans_file_a, (char*)tmp3); expand_filename_cc(trans_file_a); } } // read name of states data file // else if (strcmp((char*)tag_str, HT_STATES_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)state_file_a, (char*)tmp3); expand_filename_cc(state_file_a); } } // read name of updated transitions data file // else if (strcmp((char*)tag_str, HT_UPDATED_TRANS_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)new_trans_file_a, (char*)tmp3); expand_filename_cc(new_trans_file_a); } } // read name of updated states data file // else if (strcmp((char*)tag_str, HT_UPDATED_STATE_FILE ) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)new_state_file_a, (char*)tmp3); expand_filename_cc(new_state_file_a); } } // read name of batch stats file // else if (strcmp((char*)tag_str, HT_BATCH_STATS_FILE ) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)batch_stats_file_a, (char*)tmp3); expand_filename_cc(batch_stats_file_a); } } // read name of file containing the list of viterbi accumulators // else if (strcmp((char*)tag_str, HT_ACC_LIST_FILE ) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)acc_list_file_a, (char*)tmp3); expand_filename_cc(acc_list_file_a); } } // read name of HMM models data file // else if (strcmp((char*)tag_str, HT_MODELS_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)model_file_a, (char*)tmp3); expand_filename_cc(model_file_a); } } // read name of triphones data file // else if (strcmp((char*)tag_str, HT_PHONES_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)phone_file_a, (char*)tmp3); expand_filename_cc(phone_file_a); } } // read name of lexicon data file // else if (strcmp((char*)tag_str, HT_LEXICON_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)lex_file_a, (char*)tmp3); expand_filename_cc(lex_file_a); } } // read context mode // else if (strcmp((char*)tag_str, HT_CONTEXT_MODE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if(strcmp((char*) HT_WORD_INTERNAL_MODE, (char*)tmp3) ==0){ context_mode_a = 2; } else if(strcmp((char*)HT_MONO_PHONE_MODE, (char*)tmp3) ==0){ context_mode_a = 1; } else if(strcmp((char*)HT_CROSS_WORD_MODE, (char*)tmp3) ==0){ context_mode_a = 3; } else { fprintf(stdout, "\nht_param_0.cc: context mode error\n\n"); // exit gracefully // exit(ISIP_PROTO_ERROR); } } } // read alignment mode // else if (strcmp((char*)tag_str, (char*)HT_ALIGN_MODE_NAME) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_MODEL_ALIGN_NAME) == 0) { align_mode_a = HT_MODEL_ALIGN; } else if (strcmp((char*)tmp3, (char*)HT_STATE_ALIGN_NAME) == 0) { align_mode_a = HT_STATE_ALIGN; } else { fprintf(stdout, "alignment mode \"%s\" not recognized\n", tmp3); // exit disgracefully // exit(ISIP_PROTO_ERROR); } } } // read input mode // else if (strcmp((char*)tag_str, (char*)HT_INPUT_MODE_NAME) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_ASCII) == 0) { input_mode_a = HT_ASCII_MODE; } else if (strcmp((char*)tmp3, (char*)HT_BINARY) == 0) { input_mode_a = HT_BINARY_MODE; } } } // read input mode // else if (strcmp((char*)tag_str, (char*)HT_OUTPUT_MODE_NAME) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_ASCII) == 0) { output_mode_a = HT_ASCII_MODE; } else if (strcmp((char*)tmp3, (char*)HT_BINARY) == 0) { output_mode_a = HT_BINARY_MODE; } } } // read mlf mode // else if (strcmp((char*)tag_str, (char*)HT_MLF_MODE_NAME) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_MODEL_MLF) == 0) { mlf_mode_a = HT_MODEL_TRANS; } else if (strcmp((char*)tmp3, (char*)HT_WORD_MLF) == 0) { mlf_mode_a = HT_WORD_TRANS; } } } // read training mode // else if (strcmp((char*)tag_str, (char*)HT_TRAIN_MODE_NAME) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_NORMAL_MODE) == 0) { train_mode_a = HT_NORMAL; } else if (strcmp((char*)tmp3, (char*)HT_BATCH_MODE) == 0) { train_mode_a = HT_BATCH; } else if (strcmp((char*)tmp3, (char*)HT_COMBINE_MODE) == 0) { train_mode_a = HT_COMBINE; } } } // read the name of the special phone list file // else if (strcmp((char*)tag_str,(char*)HT_SPH_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)sph_file_a, (char*)tmp3); expand_filename_cc(sph_file_a); } } // read occupancy mode and/or filename // else if (strcmp((char*)tag_str, HT_STATE_OCCUPANCY) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { if (strcmp((char*)tmp3, (char*)HT_OPT_ON_NAME) == 0) { occ_mode_a = HT_OPT_ON; } else if (strcmp((char*)tmp3, (char*)HT_OPT_OFF_NAME) == 0) { occ_mode_a = HT_OPT_OFF; } } } else if (strcmp((char*)tag_str, (char*)HT_STATE_OCC_FILE) == 0) { if (sscanf((char*)tmp, "%s%s%s", tmp1, tmp2, tmp3) == 3) { strcpy((char*)state_occ_file_a, (char*)tmp3); expand_filename_cc(state_occ_file_a); } } // otherwise ignore the line // else { } } // delete memory // delete [] tmp; delete [] tag_str; delete [] tmp1; delete [] tmp2; delete [] tmp3; delete [] tmp4; delete [] tmp5; // return gracefully // return(ISIP_TRUE);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -