📄 trans_06.cc
字号:
// file: $isip/class/mmedia/TranscriptionDatabase/trans_06.cc// version: $Id: trans_06.cc,v 1.13 2003/05/13 20:17:43 zheng Exp $//// isip include files//#include "TranscriptionDatabase.h"// method: load//// arguments:// Filename& trans_file: (input) transcription file// String& name: (input) database name// String& level: (input) transcription level//// return: a boolean flag indicating status//// this method load the transcription database//boolean TranscriptionDatabase::load(Filename& trans_file_a, String& name_a, String& level_a) { // local variables // boolean status = true; String line; long num_file = 0; File trans_file; // debugging information // if (debug_level_d >= Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"\nloading input transcription file: "); output.concat(trans_file_a); Console::put(output); Console::decreaseIndention(); } // open the input transcription file in read mode // if (!trans_file.open(trans_file_a, File::READ_ONLY)) { String msg(L"Error: no input transcription file specified "); Console::put(msg); Error::handle(name(), L"load", Error::ERR, __FILE__, __LINE__); } // debugging information // if (debug_level_d >= Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"database name: "); output.concat(name_a); output.concat(L"\ntranscription level: "); output.concat(level_a); Console::put(output); Console::decreaseIndention(); } // default type // String gtype(L"ORTHOGRAPHIC"); // default offset values for start and the stop times(0.0) // Float offset_start(AnnotationGraph::DEF_OFFSET); Float offset_stop(AnnotationGraph::DEF_OFFSET); // default feature name and unit for transcriptions // String fname(L"level"); String unit(L"seconds"); // read the transcription file line by line // while (trans_file.get(line)) { // get rid of blank spaces on both the sides of the line // line.trim(); // local variables // String id; String transcription; String first; String start_time; String stop_time; String channel_string; // default channel (0) // Long channel = Annotation::DEF_CHANNEL_INDEX; // skip any blank line // if (line.countTokens() == (long)0) continue; // get the number of fields based on endlimiter ":" // long num_tokens = 0; num_tokens = line.countTokens(L":"); // if the format is: // : trans1 :trans2 ... // long tmp1 = 0; if (line.firstChr(L":", tmp1) == (long)0) { // loop over all the transcriptions in this line // long pos = 0; while (line.tokenize(transcription, pos, L":")) { // get the fields // id.assign(num_file); transcription.trim(); // create the annotation graph // AnnotationGraph angr(name_a, gtype); Anchor* ancr_start = (Anchor*)NULL; Anchor* ancr_stop = (Anchor*)NULL; String newid_start = angr.createAnchor(name_a, unit); String newid_stop = angr.createAnchor(name_a, unit); ancr_start = angr.getAnchorById(newid_start); ancr_stop = angr.getAnchorById(newid_stop); String newid = angr.createAnnotation(name_a, ancr_start, ancr_stop, transcription, (long)channel); if (!angr.setFeature(newid, fname, level_a)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } // insert the record in the database (identifier and // annotation graph) // if (!insertRecord(id, angr)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } angr.clear(); // increment the file count // num_file++; // debugging message // if (debug_level_d >= Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"number of the file processed: "); output.concat((Long)num_file); output.concat(L"\nidentifier: "); output.concat(id); output.concat(L"\nstart_time: "); output.concat(start_time); output.concat(L"\nstop_time: "); output.concat(stop_time); output.concat(L"\ntranscription: "); output.concat(transcription); Console::put(output); Console::decreaseIndention(); } } } // else if the format is just the transcription: // transcription // else if (num_tokens == 1) { transcription.assign(line); id.assign(num_file); // create the annotation graph // AnnotationGraph angr(name_a, gtype); Anchor* ancr_start = (Anchor*)NULL; Anchor* ancr_stop = (Anchor*)NULL; String newid_start = angr.createAnchor(name_a, unit); String newid_stop = angr.createAnchor(name_a, unit); ancr_start = angr.getAnchorById(newid_start); ancr_stop = angr.getAnchorById(newid_stop); String newid = angr.createAnnotation(name_a, ancr_start, ancr_stop, transcription, (long)channel); if (!angr.setFeature(newid, fname, level_a)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } // insert the record in the database (identifier and annotation graph) // if (!insertRecord(id, angr)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } angr.clear(); // increment the file count // num_file++; // debugging message // if (debug_level_d >= Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"number of the file processed: "); output.concat((Long)num_file); output.concat(L"\nidentifier: "); output.concat(id); output.concat(L"\nstart_time: "); output.concat(start_time); output.concat(L"\nstop_time: "); output.concat(stop_time); output.concat(L"\ntranscription: "); output.concat(transcription); Console::put(output); Console::decreaseIndention(); } } // else if the formats is: // ident: transcription // ident start_time stop_time: transcription // ident start_time stop_time channel : transcription // else if (num_tokens == 2) { // get the number of fields based on space as an endlimiter // long pos = 0; line.tokenize(first, pos, L":"); long num_tokens_space = 0; num_tokens_space = first.countTokens(); first.trim(); line.tokenize(transcription, pos, L":"); transcription.trim(); // local variables // AnnotationGraph angr(name_a, gtype); String newid_start; String newid_stop; // if the number of tokens is 1, the format is: // ident: transcription // if (num_tokens_space == 1) { // get the fields // id.assign(first); // create anchors without timming information // newid_start = angr.createAnchor(name_a, unit); newid_stop = angr.createAnchor(name_a, unit); } // else if the number of tokens is 3, the format is: // ident start_time stop_time : transcription // else if (num_tokens_space == 3) { // get the fields // long pos = 0; first.tokenize(id, pos); first.tokenize(start_time, pos); first.tokenize(stop_time, pos); offset_start.assign(start_time); offset_stop.assign(stop_time); // create anchors with timming information // newid_start = angr.createAnchor(name_a, offset_start, unit); newid_stop = angr.createAnchor(name_a, offset_stop, unit); } // else if the number of tokens is 4, the format is: // ident start_time stop_time channel : transcription // else if (num_tokens_space == 4) { // get the fields // long pos = 0; first.tokenize(id, pos); first.tokenize(start_time, pos); first.tokenize(stop_time, pos); first.tokenize(channel_string, pos); offset_start.assign(start_time); offset_stop.assign(stop_time); channel.assign(channel_string); // create anchors with timming information // newid_start = angr.createAnchor(name_a, offset_start, unit); newid_stop = angr.createAnchor(name_a, offset_stop, unit); } // else error // else { String msg(L"Error: check the transcription file format:"); Console::put(msg); Error::handle(name(), L"load", Error::ERR, __FILE__, __LINE__); } // create the annotation graph // Anchor* ancr_start = (Anchor*)NULL; Anchor* ancr_stop = (Anchor*)NULL; ancr_start = angr.getAnchorById(newid_start); ancr_stop = angr.getAnchorById(newid_stop); String newid = angr.createAnnotation(name_a, ancr_start, ancr_stop, transcription, (long)channel); if (!angr.setFeature(newid, fname, level_a)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } // insert the record in the database (identifier and annotation graph) // if (!insertRecord(id, angr)) { return Error::handle(name(), L"load", ERR, __FILE__, __LINE__); } angr.clear(); // increment the file count // num_file++; // debugging message // if (debug_level_d >= Integral::BRIEF) { Console::increaseIndention(); String output; output.assign(L"number of the file processed: "); output.concat((Long)num_file); output.concat(L"\nidentifier: "); output.concat(id); output.concat(L"\nstart_time: "); output.concat(start_time); output.concat(L"\nstop_time: "); output.concat(stop_time); output.concat(L"\ntranscription: "); output.concat(transcription); Console::put(output); Console::decreaseIndention(); } } // else error // else { String msg(L"Error: check the transcription file format:"); Console::put(msg); Error::handle(name(), L"load", Error::ERR, __FILE__, __LINE__); } } // debugging message // if (debug_level_d >= Integral::NONE) { Console::increaseIndention(); String output; output.assign(L"total number of file processed: "); output.concat((Long)num_file); Console::put(output); Console::decreaseIndention(); } // close the input transcription file // trans_file.close(); // exit gracefully // return status;}// method: load//// arguments:// Sdb& sdb: (input) sdb file name list// Filename& trans_file: (input) transcription file //// return: logical error status//// this method load the data file to transcription database//boolean TranscriptionDatabase::load(Sdb& sdb_a, Filename& trans_file_a) { // loop from start // if (!sdb_a.gotoFirst()) { String msg(L"Error: no input file specified "); Console::put(msg); Error::handle(name(), L"load", Error::NO_PARAM_FILE, __FILE__, __LINE__); } Filename trans_file; Sof transcription_file; String transcription; String substring; long num_file = 0; // declare a string vector to store the transcription information // Vector<String> trans_vec; // open the input file in read mode // File read_trans_file; if (!read_trans_file.open(trans_file_a, File::READ_ONLY)) { Console::put(L"Error in opening transcription input file"); } // read the string lines // String input_line_01; while (read_trans_file.get(input_line_01)) { trans_vec.concat(input_line_01); } // close the input text file // read_trans_file.close(); String name_00(L"TIDIGITS"); // create the annotation graph // String gtype_00(L"ORTHOGRAPHIC"); String ident_00(L"id_00"); String ident_01(L"id_01"); String ident_02(L"id_02"); String ident_03(L"id_03"); String newid_00; String newid_01; String newid_02; Float offset_00(0.0); Float offset_01(0.0); Anchor* ancr_00 = (Anchor*)NULL; Anchor* ancr_01 = (Anchor*)NULL; String unit_00(L"seconds"); String feat_00(L"level"); String value_00(L"word"); String value_01(L"phoneme"); String channel_00; setDataBaseName(name_00); do { sdb_a.getName(trans_file); AnnotationGraph angr_00(name_00, gtype_00); // get the transcription // String transcription = trans_vec(num_file); // long trans_token = transcription.countTokens();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -