📄 trans_06.cc
字号:
} // end of transcription tokenize // test the insert method // if (!insertRecord(trans_file, angr_00)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } angr_00.clear(); // move one forward for file count // num_file++; if (debug_level_d >= Integral::DETAILED) { Long(num_file).debug(L"total number of file processed="); trans_file.debug(L"file name"); transcription.debug(L"transcription"); } } while (sdb_a.gotoNext()); return true;}// method: load//// arguments:// Sdb& sdb: (input) sdb id list// Filename& trans_file: (input) transcription file // Filename& lexicon_file: (input) lexicon file // Filename& syntactic_file: (input) syntactic file //// return: logical error status//// this method load data files to the transcription database//boolean TranscriptionDatabase::load(Sdb& sdb_a, Filename& trans_file_a, Filename& lexicon_file_a, Filename& syntactic_file_a) { // loop from start // if (!sdb_a.gotoFirst()) { String msg(L"Error: no input file specified "); Console::put(msg); Error::handle(name(), L"load", Error::NO_PARAM_FILE, __FILE__, __LINE__); } String syntactic; // declare a string vector to store the syntactic information // Vector<Long> syntactic_vec; // open the input file in read mode // Sof syn_file; syn_file.open(syntactic_file_a); syntactic_vec.read(syn_file, 0); // close the input text file // syn_file.close(); long length = syntactic_vec.length(); if (debug_level_d >= Integral::DETAILED) { Long(length).debug(L"total lines="); syntactic_vec.debug(L"syn"); } Filename trans_file; Sof transcription_file; String transcription; long num_file = 0; // declare a string vector to store the transcription information // Vector<String> trans_vec; // open the input file in read mode // File read_trans_file; if (!read_trans_file.open(trans_file_a, File::READ_ONLY)) { Console::put(L"Error in opening transcription input file"); } // read the string lines // String input_line_01; while (read_trans_file.get(input_line_01)) { trans_vec.concat(input_line_01); } // close the input text file // read_trans_file.close(); long length_01 = trans_vec.length(); Long(length_01).debug(L"total lines="); // declare the hashtable for the word and its pronunciation // HashTable<String, String> pronun_map_d; // open the input file in read lexicon // Vector<String> lexicon_symbol_list; // open the input file in read mode // File read_lexicon_file; if (!read_lexicon_file.open(lexicon_file_a, File::READ_ONLY)) { Console::put(L"Error in opening lexicon input file"); } // declare variables // String str; Vector<String> nonsp_def, pre_list, word_list, rule_list; // read each line // while (read_lexicon_file.get(str)) { // pre-process the input lexicon lines and merge the same lines // boolean same = false; for (long i = 0; i < pre_list.length(); i++) { if (str.eq(pre_list(i))) same = true; } if (!same) { pre_list.concat(str); } } read_lexicon_file.close(); // process each lexicon line after pre-processing in the pre_list // for (long i = 0; i < pre_list.length(); i++) { String head_word, symbol, sequence; long pos(0); String delim(L" "); String lex_str(pre_list(i)); // get the first word in the lexicon line // lex_str.tokenize(head_word, pos); String key_word = head_word; long alt_index = 0; while (pronun_map_d.containsKey(key_word)) { key_word.assign(head_word); key_word.concat(L"."); key_word.concat(alt_index++); } String rest_string; lex_str.tokenize(rest_string, pos, lex_str.length() - pos); rest_string.trim(); pronun_map_d.insert(key_word, &rest_string); } if (debug_level_d >= Integral::DETAILED) { pronun_map_d.debug(L"lexicon"); } String name_00(L"TIDIGITS"); // create the annotation graph // String gtype_00(L"ORTHOGRAPHIC"); String ident_00(L"id_00"); String ident_01(L"id_01"); String ident_02(L"id_02"); String ident_03(L"id_03"); String ident_04(L"id_04"); String newid_00; String newid_01; String newid_02; String newid_03; String newid_04; String synid_00; String synid_01; Float offset_00(0.0); Anchor* ancr_00 = (Anchor*)NULL; Anchor* ancr_01 = (Anchor*)NULL; String unit_00(L"seconds"); String feat_00(L"level"); String value_00(L"syntactic"); String value_01(L"word"); String value_02(L"phoneme"); setDataBaseName(name_00); do { sdb_a.getName(trans_file); AnnotationGraph angr_00(name_00, gtype_00); // get the transcription // String transcription = trans_vec(num_file); long trans_token = transcription.countTokens(); long current_tran_token = 0; String atype_00; // tokenize the transcription and generate the graph // long pos = 0; newid_00 = angr_00.createAnchor(name_00, offset_00, unit_00); synid_00 = newid_00; long syntac_num = 0; long current_syn = 0; String syn_string; long skip_token = 0; while (transcription.tokenize(atype_00, pos)) { // get one token // atype_00.trim(); skip_token++; atype_00.debug(L"word:"); if (skip_token < 4) continue; if (debug_level_d >= Integral::DETAILED) { atype_00.debug(L"word:"); } // insert the word to AG // newid_01 = angr_00.createAnchor(name_00, offset_00, unit_00); ancr_00 = angr_00.getAnchorById(newid_00); ancr_01 = angr_00.getAnchorById(newid_01); newid_02 = angr_00.createAnnotation(name_00, ancr_00, ancr_01, atype_00); if (!angr_00.setFeature(newid_02, feat_00, value_01)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } // add phone level AG here // String key_phone = atype_00; String delim(L" "); long alt_index = 0; while (pronun_map_d.containsKey(key_phone)) { long pos(0); String symbol, sub_symbol; symbol.assign(*pronun_map_d.get(key_phone)); if (debug_level_d >= Integral::DETAILED) { symbol.debug(L"sub_symbol--------------------"); } long total_token = symbol.countTokens(); long token_number = 0; newid_03 = newid_00; while (symbol.tokenize(sub_symbol, pos, delim)) { if (token_number == total_token - 1) { newid_04 = newid_01; } else { newid_04 = angr_00.createAnchor(name_00, offset_00, unit_00); } ancr_00 = angr_00.getAnchorById(newid_03); ancr_01 = angr_00.getAnchorById(newid_04); newid_02 = angr_00.createAnnotation(name_00, ancr_00, ancr_01, sub_symbol); if (!angr_00.setFeature(newid_02, feat_00, value_02)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } newid_03 = newid_04; token_number++; } // end of while tokenize key_phone.assign(atype_00); key_phone.concat(L"."); key_phone.concat(alt_index++); if (debug_level_d >= Integral::DETAILED) { key_phone.debug(L"key_phone=============="); } } // end of while containsKey newid_00 = newid_01; syntac_num++; syn_string.concat(atype_00); syn_string.concat(L" "); current_tran_token++; if (current_tran_token == trans_token || syntac_num == (long)syntactic_vec(current_syn)) { synid_01 = newid_01; ancr_00 = angr_00.getAnchorById(synid_00); ancr_01 = angr_00.getAnchorById(synid_01); syn_string.trim(); newid_02 = angr_00.createAnnotation(name_00, ancr_00, ancr_01, syn_string); if (!angr_00.setFeature(newid_02, feat_00, value_00)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } syn_string.clear(); synid_00 = newid_00; syntac_num = 0; current_syn++; } } // end of transcription tokenize // test the insert method // if (!insertRecord(trans_file, angr_00)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } angr_00.clear(); // move one forward for file count // num_file++; if (debug_level_d >= Integral::DETAILED) { Long(num_file).debug(L"total number of file processed="); trans_file.debug(L"file name"); transcription.debug(L"transcription"); } } while (sdb_a.gotoNext()); return true;}// method: storePartial//// arguments:// Sof& sof_a: (input) database file name// long tag: (input) sof tag// AnnotationGraph& graph: (input) annotation graph//// return: logical error status//// this method stores the annotation graph to the database//boolean TranscriptionDatabase::storePartial(Sof& sof_a, long tag_a, AnnotationGraph& graph_a) { // write the annotation graph to the database // graph_a.write(sof_a, tag_a); // exit gracefully // return true;}// method: storePartial//// arguments:// Sof& sof_a: (input) database file name// long tag: (input) sof tag// Vector<String>& keys: (input) database identifiers//// return: logical error status//// this method stores the database header to the database//boolean TranscriptionDatabase::storePartial(Sof& sof_a, long tag_a, Vector<String>& keys_a) { // declare local variables // long obj_size = 0; // determine the size of the object // if (sof_a.isText()) { obj_size = Sof::ANY_SIZE; } else { obj_size = name_d.sofSize() + keys_a.sofSize(); } // put the object into the sof file's index // if (!sof_a.put(CLASS_NAME, tag_a, obj_size)) { return false; } // write the database name // name_d.writeData(sof_a, PARAM_NAME); // write the keys associated with the hash table // keys_a.writeData(sof_a, PARAM_KEYS); // exit gracefully // return true;}// method: storePartial//// arguments:// String& trans: (input) transcription// String& name: (input) database name// String& level: (input) transcription level// long num: (input) the number of transcription in db// Sof& db_sof: (input) db file name// long tag: (input) tag//// return: a boolean flag indicating status//// this method stores one transcription into the// TranscriptionDatabase. //boolean TranscriptionDatabase::storePartial(String& trans_a, String& name_a, String& level_a, long num_a, Sof& db_sof_a, long tag_a) { // local variables // boolean status = true; String line; File trans_file; // debugging information // if (debug_level_d > Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"database name: "); output.concat(name_a); output.concat(L"\ntranscription level: "); output.concat(level_a); Console::put(output);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -