📄 trans_06.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
📖 第 1 页 / 共 4 页
字号:
    //    long current_tran_token = 0;    String atype_00;        // tokenize the transcription and generate the graph    //    long pos = 0;    //    long syntac_num = 0;    //    long current_syn = 0;    String syn_string;    long skip_token = 0;          // while (transcription.tokenize(atype_00, pos))    {      atype_00.assign(transcription);      Long x = transcription.firstStr(L":", 0);       x.debug(L"x=");            transcription.deleteRange(0, (long)x+1);      transcription.trim();      transcription.debug(L"TR");            atype_00.deleteRange(x, atype_00.length()-x);      atype_00.debug(L"atype");            // get one token      //      atype_00.trim();      String time_00;            while (atype_00.tokenize(time_00, pos)) {		skip_token++;	if (skip_token ==1) offset_00.assign(time_00);	else if (skip_token == 2) offset_01.assign(time_00);	else if (skip_token == 3) channel_00.assign(time_00);      }      long channel_x = 0;      if ( channel_00.eq(L"A")) {	channel_00.debug(L"channel A:");	channel_x = 0;	      }      else if (channel_00.eq(L"B")) {	channel_00.debug(L"channel B:");	channel_x = 1;      }            if (debug_level_d >= Integral::DETAILED) {	atype_00.debug(L"word:");      }            // insert the word to AG      //            newid_00 = angr_00.createAnchor(name_00, offset_00, unit_00);      newid_01 = angr_00.createAnchor(name_00, offset_01, unit_00);          ancr_00 = angr_00.getAnchorById(newid_00);      ancr_01 = angr_00.getAnchorById(newid_01);            newid_02 = angr_00.createAnnotation(name_00, ancr_00, ancr_01,					  transcription, channel_x);            if (!angr_00.setFeature(newid_02, feat_00, value_00)) {	return Error::handle(name(), L"load", ERR, __FILE__, __LINE__);      }    } // end of  transcription tokenize        // test the insert method    //      if (!insertRecord(trans_file, angr_00)) {      return Error::handle(name(), L"load", ERR, __FILE__, __LINE__);    }    angr_00.clear();        // move one forward for file count    //    num_file++;    if (debug_level_d >= Integral::DETAILED) {      Long(num_file).debug(L"total number of file processed=");      trans_file.debug(L"file name");      transcription.debug(L"transcription");    }    // debug(L"upto now");      } while (sdb_a.gotoNext());    return true;}// method: load//// arguments://  Sdb& sdb: (input) sdb id list//  Filename& trans_file: (input) transcription file //  Filename& lexicon_file: (input) lexicon file//  boolean flag: (input) flag to indicate if the time information exist//// return: logical error status//// this method load data files to the transcription database//boolean TranscriptionDatabase::load(Sdb& sdb_a,				    Filename& trans_file_a,				    Filename& lexicon_file_a, boolean flag_a) {  // loop from start  //  if (!sdb_a.gotoFirst()) {    String msg(L"Error: no input file specified ");    Console::put(msg);    Error::handle(name(), L"load", Error::NO_PARAM_FILE,                  __FILE__, __LINE__);  }  Filename trans_file;  Sof transcription_file;  String transcription;      long num_file = 0;  // declare a string vector to store the transcription information  //  Vector<String> trans_vec;    // open the input file in read mode  //  File read_trans_file;  if (!read_trans_file.open(trans_file_a, File::READ_ONLY)) {    Console::put(L"Error in opening transcription input file");  }  // read the string lines  //  String input_line_01;  while (read_trans_file.get(input_line_01)) {    trans_vec.concat(input_line_01);  }    // close the input text file  //  read_trans_file.close();  long length_01 = trans_vec.length();  Long(length_01).debug(L"total lines in transcription = ");  // declare the hashtable for the word and its pronunciation  //  HashTable<String, String> pronun_map_d;  // open the input file in read lexicon  //  Vector<String> lexicon_symbol_list;  // open the input file in read mode  //  File read_lexicon_file;  if (!read_lexicon_file.open(lexicon_file_a, File::READ_ONLY)) {    Console::put(L"Error in opening lexicon input file");  }  // declare variables  //  String str;  Vector<String> nonsp_def, pre_list, word_list, rule_list;    // read each line  //  while (read_lexicon_file.get(str)) {    str.debug(L"str");    // pre-process the input lexicon lines and merge the same lines    //    boolean same = false;    for (long i = 0; i < pre_list.length(); i++) {      if (str.eq(pre_list(i))) 	same = true;    }    if (!same) {       pre_list.concat(str);    }  }  read_lexicon_file.close();   // process each lexicon line after pre-processing in the pre_list  //  for (long i = 0; i < pre_list.length(); i++) {        String head_word, symbol, sequence;    long pos(0);    String delim(L" ");    String lex_str(pre_list(i));        // get the first word in the lexicon line    //    lex_str.tokenize(head_word, pos);        String key_word = head_word;    long alt_index = 0;        while (pronun_map_d.containsKey(key_word)) {      key_word.assign(head_word);      key_word.concat(L".");      key_word.concat(alt_index++);    }    String rest_string;    lex_str.tokenize(rest_string, pos, lex_str.length() - pos);    rest_string.trim();    rest_string.debug(L"lexicon");    pronun_map_d.insert(key_word, &rest_string);  }  if (debug_level_d >= Integral::DETAILED) {    pronun_map_d.debug(L"lexicon");  }    String name_00(L"SPINE");  // create the annotation graph  //  String gtype_00(L"ORTHOGRAPHIC");  String ident_00(L"id_00");  String ident_01(L"id_01");  String ident_02(L"id_02");  String ident_03(L"id_03");      String ident_04(L"id_04");    String newid_00;  String newid_01;  String newid_02;  String newid_03;  String newid_04;    String synid_00;  String synid_01;    Float offset_00(0.0);  Float offset_01(0.0);  Float offset_02(0.0);    Anchor* ancr_00 = (Anchor*)NULL;  Anchor* ancr_01 = (Anchor*)NULL;    String unit_00(L"seconds");  String feat_00(L"level");  String value_00(L"syntactic");  String value_01(L"word");  String value_02(L"phoneme");    String channel_00;      setDataBaseName(name_00);    do {    sdb_a.getName(trans_file);    AnnotationGraph angr_00(name_00, gtype_00);    // get the transcription    //    String transcription = trans_vec(num_file);    // pre-processing transcription    //    String atype_00;        atype_00.assign(transcription);    long pos = 0;      long skip_token = 0;    long channel_x = 0;          if (flag_a) {      long x = transcription.firstStr(L":", 0);       Long(x).debug(L"':' position =");            transcription.deleteRange(0, (long)x+1);      transcription.trim();      transcription.debug(L"transcription");            atype_00.deleteRange(x, atype_00.length()-x);      atype_00.debug(L"atype");      atype_00.trim();          // tokenize the transcription and generate the graph      //            String time_00;          while (atype_00.tokenize(time_00, pos)) {		skip_token++;	if (skip_token ==1) offset_00.assign(time_00);	else if (skip_token == 2) offset_01.assign(time_00);	else if (skip_token == 3) channel_00.assign(time_00);      }      if ( channel_00.eq(L"A")) {	channel_00.debug(L"channel A:");	channel_x = 0;	      }      else if (channel_00.eq(L"B")) {	channel_00.debug(L"channel B:");	channel_x = 1;      }          if (debug_level_d >= Integral::DETAILED) {	atype_00.debug(L"word:");      }    } // end of flag_a        long trans_token = transcription.countTokens();    long current_tran_token = 0;    long token_count = 0;    if (flag_a) {      newid_00 = angr_00.createAnchor(name_00, offset_00, unit_00);        }    else {      newid_00 = angr_00.createAnchor(name_00, unit_00);        }        synid_00 = newid_00;    pos = 0;        while (transcription.tokenize(atype_00, pos)) {      // get one token      //      atype_00.trim();      token_count++;      atype_00.debug(L"word:");	      if (debug_level_d >= Integral::DETAILED) {	atype_00.debug(L"word:");      }            // insert the word to AG      //            if (token_count == trans_token) {	if (flag_a) {	  newid_01 = angr_00.createAnchor(name_00, offset_01, unit_00);	}	else {	  newid_01 = angr_00.createAnchor(name_00, unit_00);	  	}      }      else {	newid_01 = angr_00.createAnchor(name_00, unit_00);      }            ancr_00 = angr_00.getAnchorById(newid_00);      ancr_01 = angr_00.getAnchorById(newid_01);      ancr_00->debug(L"new_00");      ancr_01->debug(L"new_01");            newid_02 = angr_00.createAnnotation(name_00, ancr_00, ancr_01,					  atype_00, channel_x);      ancr_00->debug(L"new_00A");      ancr_01->debug(L"new_01A");      if (!angr_00.setFeature(newid_02, feat_00, value_01)) {	return Error::handle(name(), L"load", ERR, __FILE__, __LINE__);      }      ancr_00->debug(L"new_00AA");      ancr_01->debug(L"new_01AA");	      // add phone level AG here      //      String key_phone = atype_00;      String delim(L" ");      long alt_index = 0;      while (pronun_map_d.containsKey(key_phone)) {	long pos(0);		String symbol, sub_symbol;	symbol.assign(*pronun_map_d.get(key_phone));		if (debug_level_d >= Integral::DETAILED) {	  symbol.debug(L"sub_symbol--------------------");	}		long total_token = symbol.countTokens();	long token_number = 0;	newid_03 = newid_00;		while (symbol.tokenize(sub_symbol, pos, delim)) {	  if (token_number == total_token - 1) {	    newid_04 = newid_01; 	  }	  	  else {	    newid_04 = angr_00.createAnchor(name_00, unit_00);	  }	  ancr_00 = angr_00.getAnchorById(newid_03);	  ancr_01 = angr_00.getAnchorById(newid_04);	  ancr_00->debug(L"new_00B");	  ancr_01->debug(L"new_01B");	  	  newid_02 = angr_00.createAnnotation(name_00, ancr_00,					      ancr_01, sub_symbol);	  ancr_00->debug(L"new_00BB");	  ancr_01->debug(L"new_01BB");	    	  if (!angr_00.setFeature(newid_02, feat_00, value_02)) {	    return Error::handle(name(), L"load", ERR, __FILE__,				 __LINE__);	  }	  	  ancr_00->debug(L"new_00BBB");	  ancr_01->debug(L"new_01BBB");	  newid_03 = newid_04;	  token_number++;	} // end of while tokenize	key_phone.assign(atype_00);	key_phone.concat(L".");	key_phone.concat(alt_index++);	if (debug_level_d >= Integral::DETAILED) {	  key_phone.debug(L"key_phone==============");	}	      } // end of while containsKey      newid_00 = newid_01;      current_tran_token++;            if (current_tran_token == trans_token) {	synid_01 = newid_01;	ancr_00 = angr_00.getAnchorById(synid_00);	ancr_01 = angr_00.getAnchorById(synid_01);	//syn_string.trim();	ancr_00->debug(L"new_00C");	ancr_01->debug(L"new_01C");      	newid_02 = angr_00.createAnnotation(name_00, ancr_00,					    ancr_01, transcription, channel_x);	ancr_00->debug(L"new_00D");	ancr_01->debug(L"new_01D");		if (!angr_00.setFeature(newid_02, feat_00, value_00)) {	  return Error::handle(name(), L"load", ERR, __FILE__,			       __LINE__);	}	//	syn_string.clear();	ancr_00->debug(L"new_00E");	ancr_01->debug(L"new_01E");		synid_00 = newid_00;      }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -