📄 ht_words_0.cc
字号:
// file: ht_words_0.cc//// system include files//#include <string.h>// isip include files//#include "hmm_train.h"#include "hmm_train_constants.h" // method: read_lexicon_cc//// arguments:// int_4& num_words : (output) number of words in lexicon// int_4 num_mono : (input) number of phones// char_1** monophones : (input) list of phone models// char_1* file : (input) name of input file// // return: a Train_Hash_table* pointing to a hash table of word models//Train_Hash_table* read_lexicon_cc(int_4& num_words_a, int_4 num_mono_a, char_1** monophones_a, char_1* file_a) { // open data file // FILE* fp = fopen((char*)file_a, "r"); if (fp == (FILE*)NULL) { fprintf(stdout, "Cannot open file %s\n", file_a); exit(ISIP_PROTO_ERROR); } // memory manager // Train_Memory_manager* manager = Train_Link_list::get_manager_cc(); // variables to read data // char_1* tmp = new char_1[ISIP_MAX_STRING_LENGTH]; tmp[0] = '\0'; char_1* prev = new char_1[ISIP_MAX_STRING_LENGTH]; prev[0] = '\0'; char_1* phstr = new char_1[ISIP_MAX_STRING_LENGTH]; phstr[0] = '\0'; int_4 num_prons = (int_4)0; int_4* num_phones = (int_4*)NULL; int_4** phlist = (int_4**)NULL; int_4** phl = (int_4**)NULL; int_4* nph = (int_4*)NULL; logical_1 pron_flag = ISIP_TRUE; int_4 num_ph = (int_4)0; int_4* phones = new int_4[ISIP_MAX_STRING_LENGTH]; for (int_4 i = 0; i < ISIP_MAX_STRING_LENGTH; i++) { phones[i] = (int_4)-1; } logical_1 null_flag = ISIP_FALSE; Train_Word* word = (Train_Word*)NULL; Train_Hash_cell* wrd_cell = (Train_Hash_cell*)NULL; // initialize the number of words // num_words_a = (int_4)0; // allocate space for the word hash table // Train_Hash_table* table = new Train_Hash_table(TRAIN_HASH_TABLE_SIZE); // read data from file // while (fscanf(fp, "%s", tmp) != EOF) { // ignore comment lines // if (tmp[0] == (char_1)'#') { // do nothing // fscanf(fp, "%[^\n]", tmp); fscanf(fp, "%[\n]", tmp); } // otherwise read the word data // else { // if this is the same as the previous word increment the // pronunciation count // if (strcmp((char*)tmp, (char*)prev) == (int_4)0) { num_prons++; } // otherwise add data of the previous word and make a word model // else { // create the word model // if (num_prons > (int_4)0) { // create word model, put it in the hash table and increment count // word = new Train_Word(num_words_a, prev, phlist, num_phones, num_prons); wrd_cell = manager->new_hash_cc(); wrd_cell->set_cc(prev, (void_p)word); table->hash_insert_cc(wrd_cell); num_words_a++; // free phone memory // delete [] num_phones; num_phones = (int_4*)NULL; for (int_4 jj = 0; jj < num_prons; jj++) { delete [] phlist[jj]; } delete [] phlist; phlist = (int_4**)NULL; } // reset pronunciation count // pron_flag = ISIP_TRUE; num_prons = (int_4)1; // if this is the null word then create a word model with no // pronunciations // if (strcmp((char*)tmp, (char*)TRAIN_WRD_NULL) == (int_4)0) { // no need to read pronunciations // num_prons = (int_4)0; pron_flag = ISIP_FALSE; null_flag = ISIP_TRUE; // create word model, put it in the hash table and increment count // word = new Train_Word(num_words_a, tmp, phlist, num_phones, num_prons); wrd_cell = manager->new_hash_cc(); wrd_cell->set_cc(tmp, (void_p)word); table->hash_insert_cc(wrd_cell); num_words_a++; } } // read the phone string if required // if (pron_flag == ISIP_TRUE) { fscanf(fp, "%[\t\b]", phstr); fscanf(fp, "%[^\n]", phstr); // now break it into phone components and list them // num_ph = (int_4)0; char_1* ph = (char_1*)strtok((char*)phstr, " "); for (int_4 k = 0; k < num_mono_a; k++) { if (strcmp((char*)ph, (char*)monophones_a[k]) == 0) { phones[num_ph++] = k; break; } } while ((ph = (char_1*)strtok(NULL, " "))) { for (int_4 k = 0; k < num_mono_a; k++) { if (strcmp((char*)ph, (char*)monophones_a[k]) == 0) { phones[num_ph++] = k; break; } } } // check to see if all phones are valid // for (int_4 i = 0; i < num_ph; i++) { // exit if phone no defined // if (phones[i] == (int_4)-1) { fprintf(stdout, "Error : phone \"%s\" is not defined\n", ph); exit(ISIP_PROTO_ERROR); } } // add this info to the existing data on this word // nph = new int_4[num_prons]; phl = new int_4*[num_prons]; for (int_4 j = 0; j < num_prons - (int_4)1; j++) { nph[j] = num_phones[j]; phl[j] = new int_4[nph[j]]; for (int_4 k = 0; k < nph[j]; k++) { phl[j][k] = phlist[j][k]; } } nph[num_prons - (int_4)1] = num_ph; phl[num_prons - (int_4)1] = new int_4[num_ph]; for (int_4 k = 0; k < num_ph; k++) { phl[num_prons - (int_4)1][k] = phones[k]; } // free old meory and copy pointers // delete [] num_phones; num_phones = nph; int_4** temp_ptr = phlist; phlist = phl; phl = temp_ptr; // free memory // for (int_4 j = 0; j < num_ph; j++) { phones[j] = (int_4)-1; } if (phl != (int_4**)NULL) { for (int_4 j = 0; j < num_prons - 1; j++) { if (phl[j] != (int_4*)NULL) { delete [] phl[j]; } phl[j] = (int_4*)NULL; } delete [] phl; phl = (int_4**)NULL; } } // read end of line // strcpy((char*)prev, (char*)tmp); fscanf(fp, "%[\n]", tmp); } } // end while loop // add the last word // word = new Train_Word(num_words_a, prev, phlist, num_phones, num_prons); wrd_cell = manager->new_hash_cc(); wrd_cell->set_cc(prev, (void_p)word); table->hash_insert_cc(wrd_cell); num_words_a++; for (int_4 jj = 0; jj < num_prons; jj++) { delete [] phlist[jj]; } delete [] phlist; phlist = (int_4**)NULL; // if no word !NULL in the lexicon, add the word !NULL which has no // pronunciations // if (null_flag == ISIP_FALSE) { // reset counts // num_prons = (int_4)0; num_phones = (int_4)0; // create a !NULL word // strcpy((char*)tmp, (char*)TRAIN_WRD_NULL); word = new Train_Word(num_words_a, tmp, phlist, num_phones, num_prons); wrd_cell = manager->new_hash_cc(); wrd_cell->set_cc(tmp, (void_p)word); table->hash_insert_cc(wrd_cell); num_words_a++; } // free phone memory // delete [] num_phones; delete [] nph; num_phones = (int_4*)NULL; // close file // fclose(fp); // delete memory // delete [] phones; delete [] tmp; delete [] prev; delete [] phstr; // return the hash table exit gracefully // return table;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -