📄 tr_lat_read_0.cc
字号:
// file: tr_lat_read_0.cc//// system include files//#include <memory.h>#include <string.h>#include <ctype.h>// isip include files//#include "train_lattice.h"#include "train_lattice_constants.h"// method: read_lattice_cc//// arguments:// FILE* fp_in_a: (input) input Train_Lattice file// int_4* Train_Word_table : (input) the hash table of Train_Word indices//// return: a logical_1 indicating status//// this method reads in the Train_Lattice from a file//logical_1 Train_Lattice::read_lattice_cc(FILE* fp_in_a, Train_Hash_table* words_a) { // define some local variables // char_1* buffer = new char_1[ISIP_MAX_STRING_LENGTH]; char_1* buffer_pos = buffer; char_1* temp_buf = (char_1*)NULL; char_1* delimiter = (char_1*)"="; float_4 time = (float_4)0; int_4 num_nodes = (int_4)0; int_4 num_arcs = (int_4)0; int_4 frame_index = (int_4)0; int_4 node_index = (int_4)0; int_4 arc_count = (int_4)0; int_4 start_node = (int_4)0; int_4 end_node = (int_4)0; logical_1 read_words = ISIP_TRUE; int_4 pron_var = (int_4)0; float_8 ac_like = (float_8)0; float_8 lm_score = (float_8)0; float_4 lm_scale = (float_4)0; float_4 word_penalty = (float_4)0; Train_Word* word = (Train_Word*)NULL; Train_Hash_cell* hcell = (Train_Hash_cell*)NULL; Train_Lattice_node** lnodes = (Train_Lattice_node**)NULL;; // define memory // memset((char*)buffer, 0, ISIP_MAX_STRING_LENGTH); // read in the lines till we get to reading the link/arc information // read the Train_Lattice generation specific information like utterance // number, word insertion penalty, lexicon, and Train_Model set // also read the node information // while (fgets((char*)buffer_pos, ISIP_MAX_STRING_LENGTH, fp_in_a) && (*buffer_pos != 'J')) { // find the delimited value // temp_buf = (char_1*)strtok((char*)buffer_pos, (char*)delimiter); // parse this string // while (temp_buf != (char_1*)NULL) { // move the pointer to first non-whitespace // while (isspace((char)(*temp_buf))) { temp_buf++; } // read the Train_Lattice generation information // version number of generation program (typically HVite) // if (strcmp((char*)temp_buf, "VERSION") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // utterance represented by the Train_Lattice // else if (strcmp((char*)temp_buf, "UTTERANCE") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); set_utterance_cc((char_1*)temp_buf); } // word insertion penalty used during Train_Lattice generation // else if (strcmp((char*)temp_buf, "wdpenalty") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); word_penalty = atof((char*)temp_buf); set_word_penalty_cc(word_penalty); } // language Train_Model scaling used during Train_Lattice generation // else if (strcmp((char*)temp_buf, "lmscale") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); lm_scale = atof((char*)temp_buf); set_lm_scale_cc(lm_scale); } // language Train_Model used during Train_Lattice generation // else if (strcmp((char*)temp_buf, "vocab") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); set_lang_model_cc((char_1*)temp_buf); } // Train_Model set used during Train_Lattice generation // else if (strcmp((char*)temp_buf, "hmms") == 0) { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); set_model_set_cc((char_1*)temp_buf); } // number of nodes in the Train_Lattice // else if (strcmp((char*)temp_buf, "N") == 0) { // do not confuse with alphabet N which could be a word // if (read_words == ISIP_TRUE) { // set the number of Train_Lattice nodes // temp_buf = (char_1*)strtok((char*)NULL, " "); num_nodes = atoi((char*)temp_buf); set_num_nodes_cc(num_nodes); // initialize the Train_Lattice nodes table // lnodes = new Train_Lattice_node*[num_nodes]; init_nodes_cc(lnodes); } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } // number of arcs in the Train_Lattice // else if (strcmp((char*)temp_buf, "L") == 0) { // do not confuse with alphabet L which could be a word // if (read_words == ISIP_TRUE) { // set the number of arcs // temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); num_arcs = atoi((char*)temp_buf); set_num_arcs_cc(num_arcs); } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } // read the node information (node index, end time, word etc.) // else if (strcmp((char*)temp_buf, "I") == 0) { // do not confuse with alphabet I which could be a word // if (read_words == ISIP_TRUE) { // set the node index // temp_buf = (char_1*)strtok((char*)NULL, " "); node_index = atoi((char*)temp_buf); lnodes[node_index]->set_node_index_cc(node_index); } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } // read the frame information for this node // else if (strcmp((char*)temp_buf, "t") == 0) { // do not confuse with alphabet t which could be a word // if (read_words == ISIP_TRUE) { // set the frame index // temp_buf = (char_1*)strtok((char*)NULL, " "); time = atof((char*)temp_buf); frame_index = (int_4)rint(time * TRAIN_LATTICE_FRAMES_PER_SEC); lnodes[node_index]->set_frame_index_cc(frame_index); } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } // read the word index of the word represented by the node // else if (strcmp((char*)temp_buf, "W") == 0) { // get the word string and check the lexicon if this word // exists // temp_buf = (char_1*)strtok((char*)NULL, " "); hcell = words_a->hash_lookup_cc((char_1*)temp_buf); // error if word not found in lexicon // if (hcell == (Train_Hash_cell*)NULL) { fprintf (stdout, "%s\n", temp_buf); error_handler_cc((char_1*)"lat_read_0.cc", (char_1*)"word does not exist in lexicon"); } // set the item index of the node // word = (Train_Word*)(hcell->get_item_cc()); lnodes[node_index]->set_word_cc(word); read_words = ISIP_FALSE; temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // read in the pronunciation variant used // else if (strcmp((char*)temp_buf, "v") == 0) { // do not confuse with alphabet L which could be a word // if (read_words == ISIP_TRUE) { // set the pronunciation variation index // temp_buf = (char_1*)strtok((char*)NULL, " "); pron_var = atoi((char*)temp_buf); lnodes[node_index]->set_pron_var_cc(pron_var); } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } // otherwise ignore this word and move on // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); read_words = ISIP_TRUE; } } // end while temp_buf is not null } // end while fgets // start reading the arc information // complete parsing the first line of the arc information // loop over the remaining lines of the file // do { // find the delimited value // temp_buf = (char_1*)strtok((char*)buffer_pos, (char*)delimiter); // while the line is not empty // while (temp_buf != (char_1*)NULL) { // move the pointer to first non-whitespace // while (isspace((char)(*temp_buf))) { temp_buf++; } // read in the arc index // if (strcmp((char*)temp_buf, "J") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); arc_count++; temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // start node of arc // else if (strcmp((char*)temp_buf, "S") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); start_node = atoi((char*)temp_buf); temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // end node of arc // else if (strcmp((char*)temp_buf, "E") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); end_node = atoi((char*)temp_buf); // update the prev node and next node pointers for the two nodes // corresponding to this arc // lnodes[start_node]->add_next_node_cc(lnodes[end_node]); lnodes[end_node]->add_prev_node_cc(lnodes[start_node]); temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // word represented by the arc end node // else if (strcmp((char*)temp_buf, "W") == 0) { // find the corresponding word in the lexicon // temp_buf = (char_1*)strtok((char*)NULL, " "); hcell = words_a->hash_lookup_cc((char_1*)temp_buf); // error if word not found in lexicon // if (hcell == (Train_Hash_cell*)NULL) { fprintf (stdout, "%s\n", (char*)temp_buf); error_handler_cc((char_1*)"lat_read_0.cc", (char_1*)"word does not exist in lexicon"); } // set the item index of the node // word = (Train_Word*)(hcell->get_item_cc()); lnodes[end_node]->set_word_cc(word); // create the Train_Lattice start node --- this is a dummy node that is // not evaluated, it just serves as a starting point for all the // sentence start nodes // if (strcmp((char*)temp_buf, (char*)TRAIN_LATTICE_SENT_START) == 0) { start_node_d = lnodes[start_node]; } // move on to the next word // temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // pronunciation variant // else if (strcmp((char*)temp_buf, "v") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); pron_var = atoi((char*)temp_buf); lnodes[end_node]->set_pron_var_cc(pron_var); temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // acoustic likelihood of arc // else if (strcmp((char*)temp_buf, "a") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); ac_like = atof((char*)temp_buf); lnodes[start_node]->add_ac_score_cc(ac_like); temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // read in the lm score // else if (strcmp((char*)temp_buf, "l") == 0) { temp_buf = (char_1*)strtok((char*)NULL, " "); lm_score = atof((char*)temp_buf); lm_score *= lm_scale_d; lnodes[start_node]->add_lm_score_cc(lm_score); temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } // move on to the next word // else { temp_buf = (char_1*)strtok((char*)NULL, (char*)delimiter); } } } while ((fgets((char*)buffer_pos,ISIP_MAX_STRING_LENGTH,fp_in_a)) && (arc_count <= num_arcs_d)); // free memory // if (lnodes != (Train_Lattice_node**)NULL) { delete [] lnodes; } delete [] buffer; // exit gracefully // return ISIP_TRUE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -