📄 isip_model_creator.cc

📁 这是一个从音频信号里提取特征参量的程序
💻 CC
📖 第 1 页 / 共 3 页
字号:
// function: check if the input grammar string is a definition for// ISIP reserved nonspeech symbols//boolean isNonSpeech(String& arg_a) {  String sub, input(arg_a), delim(L" \n");  long pos = 0;  // search "public" keyword  //  while(input.tokenize(sub, pos, delim)) {    if(sub.eq(L"public")) {      // check the next token      //      input.tokenize(sub, pos, delim);            if(sub.eq(L"<ISIP_NON_SPEECH>")) {	return true;      }    }  }  // the definition not found  //  return false;}// function: convert lexicon input text file//boolean convertLexicon(Filename& lexicon_file_a, Sof& sof_a,		       Vector<String>& input_symbol_list_a,		       Vector<String>& output_symbol_list_a) {  // number of grammars contained in the input file  //  long num_grammars = 0;  boolean def_nonsp = false;    // open the input file in read mode  //  File read_lexicon_file;  if (!read_lexicon_file.open(lexicon_file_a, File::READ_ONLY)) {    Console::put(L"Error in opening lexicon input file");  }  // declare variables  //  String str;  Vector<String> nonsp_def, pre_list, word_list, rule_list;  boolean JSGF_found = false;    // read each line  //  while(read_lexicon_file.get(str)) {    // look for the JSGF grammar that defines the nonspeech symbols    //    if (str.eq(L"#JSGF V1.0;")) {      JSGF_found = true;    }    // after meeting the JSGF grammar, get the JSGF grammar that    // defines the nonspeech symbols    //    if (JSGF_found) {	nonsp_def.concat(str);    }    // before meeting the JSGF grammar, process lexicon    //    else {      // pre-process the input lexicon lines and merge the same lines      //      boolean same = false;      for(long i=0; i<pre_list.length(); i++) {	if(str.eq(pre_list(i))) 	  same = true;      }      if(!same) { 	pre_list.concat(str);      }    }  }  // create grammar line for the nonspeech JSGF grammar  //  String nonsp_grammar(L"grammar = {\n");  for (long i = 0; i < nonsp_def.length(); i++) {    nonsp_grammar.concat(L"  ");    nonsp_grammar.concat(nonsp_def(i));    nonsp_grammar.concat(L"\n");  }  nonsp_grammar.concat(L"};\n\n");    // check if non-speech grammar is available  //  if (isNonSpeech(nonsp_grammar)) {    def_nonsp = true;  }      // process each lexicon line after pre-processing in the pre_list  //  for(long i=0; i<pre_list.length(); i++) {        String head_word, symbol, sequence;    long pos(0);    String delim(L" ");    String lex_str(pre_list(i));    // get the first word in the lexicon line    //    lex_str.tokenize(head_word, pos);    // tokenize out each symbol sequentially from the lexicon line    //    while(lex_str.tokenize(symbol, pos, delim)) {      if(symbol.ne(head_word)) {	sequence.concat(L" /0/ ");	sequence.concat(symbol);      }    }    // check if the head_word already exists in the word list    //    boolean exist = false;    long alt_index = 0;        for(long i=0; i<word_list.length(); i++) {      if(head_word.eq(word_list(i))) {	exist = true;	alt_index = i;      }    }    // if the head word is not available in the word_list yet     //    if(!exist) {      // add the head word to the word_list      //      word_list.concat(head_word);      num_grammars++;      // also add the rule sequence to the rule_list      //      rule_list.concat(sequence);    }        // if the head word is already included in the word_list    //    else {      // add the sequence rule part to the rule list as alternative      //      String tmp;      // get existing rule in the rule line      //      String exist_rule(rule_list(alt_index));      Char end(exist_rule(rule_list(alt_index).length()-1));      // if no alternative relation exists in the rule line      // ( and ) signs need to be added to surround the existing sequence rule      // so that we add new alternative parallel to the existing sequence      //      if(!end.eq(')')) {	tmp.assign(L" (");	tmp.concat(exist_rule);	tmp.concat(L" )");      }      else {	tmp.assign(exist_rule);      }      // add the new sequence as alternative      //      tmp.concat(L" | (");      tmp.concat(sequence);      tmp.concat(L" )");      // update the rule in the rule list      //      rule_list(alt_index).assign(tmp);    }  } // end: for(long i=0; i<pre_list.length(); i++)     // close the input text file  //  read_lexicon_file.close();    // make sure each symbol in the input symbol list has a corresponding  // grammar defined in the lexicon file  //  if(num_grammars != input_symbol_list_a.length()) {    // output warning    //    Console::put(L"\nWarning: the number of grammars in the lexicon input file\ndoes not match the number of symbols in the LM input file.\n\n");  }    Vector<String> words, rules;  for(long i=0; i<input_symbol_list_a.length(); i++) {    boolean found = false;    for(long j=0; j<word_list.length(); j++) {      if(input_symbol_list_a(i).eq(word_list(j))) {	found = true;	words.concat(word_list(j));	rules.concat(rule_list(j));      }    }    if(!found) {      String output(L"symbol \"");      output.concat(input_symbol_list_a(i));      output.concat(L"\"");      return Error::handle(output, L"no corresponding grammar in lexicon file\n", Error::TEST, __FILE__, __LINE__);    }  }  // update the number of lexicon grammars to fit the input symbol list  //  num_grammars = words.length();    // declare a vector of string to store JSGF grammar strings  //  Vector<String> grammars(num_grammars);  // declare and initialize a string to store a single JSGF grammar  //  String grammar_str(L"grammar = {\n");  grammar_str.concat(L"  #JSGF V1.0;\n");  grammar_str.concat(L"  // Define the grammar name\n");  grammar_str.concat(L"  grammar network.grammar.");  for(long i=0; i<grammars.length(); i++) {    grammars(i).assign(grammar_str);    grammars(i).concat(words(i));    grammars(i).concat(L";\n\n  // Define the rules\n  public <");    grammars(i).concat(words(i));    grammars(i).concat(L"> = ");    grammars(i).concat(L"<ISIP_JSGF_1_0_START>");    Char end(rules(i)(rules(i).length()-1));        // if the rule includes alternative    //    if(end.eq(L')')) {      grammars(i).concat(L" (");      grammars(i).concat(rules(i));      grammars(i).concat(L" )");    }    // if it includes only sequence    //    else {      grammars(i).concat(rules(i));    }    grammars(i).concat(L" /0/ <ISIP_JSGF_1_0_TERM>;\n");    grammars(i).concat(L"};\n\n");  }  // write the grammars into the output Sof file  //  // define algorithm tag line  //  String algo_tag(L"algorithm = \"JSGF\";\n");  // set the size of the object to be written  //  long size = algo_tag.length();    if (def_nonsp) {    size += nonsp_grammar.length();  }     for(long i=0; i<grammars.length(); i++) {    size += grammars(i).length();  }      // write the lexicon model into the sof file  //  sof_a.put(L"JSGF", 1, size);  sof_a.puts(algo_tag);  if (def_nonsp) {    sof_a.puts(nonsp_grammar);  }    for(long i=0; i<grammars.length(); i++) {    sof_a.puts(grammars(i));  }  // get the symbol list in this level  //  for(long i=0; i<grammars.length(); i++) {    Vector<String> tmp, symbol_list;    getSymbols(grammars(i), tmp);    // check if any symbol is already in the symbol list    //    for(long i=0; i<tmp.length(); i++) {      boolean exist = false;      for(long j=0; j<output_symbol_list_a.length(); j++) { 	if(tmp(i).eq(output_symbol_list_a(j))) {	  exist = true;	}      }      if(!exist) {	symbol_list.concat(tmp(i));      }    }    output_symbol_list_a.concat(symbol_list);  }    // exit gracefully  //  return true;}// function: convert acoustic model input text file//boolean convertAcoustic(Filename& acoustic_file_a, Sof& sof_a,			Vector<String>& input_symbol_list_a,			Vector<String>& output_symbol_list_a) {  // number of grammars contained in the input file  //  long num_grammars = 0;    // open the input file in read mode  //  File read_acoustic_file;  if (!read_acoustic_file.open(acoustic_file_a, File::READ_ONLY)) {    Console::put(L"Error in opening acoustic model input file");  }  // get the number of grammars in the input file  //  String str;  while(read_acoustic_file.get(str)) {    String sub;    long pos(0);    str.tokenize(sub, pos);    if(sub.eq(L"#JSGF"))      num_grammars++;  }  // close the input text file  //  read_acoustic_file.close();    // declare a vector of strings to contain the input text file  //  Vector<String> acoustic_text(num_grammars);    // open the input file again  //  if (!read_acoustic_file.open(acoustic_file_a, File::READ_ONLY)) {    Console::put(L"Error in opening acoustic model input file");  }    // read the input text and add it line by line to the string vector  //  String str_lm, tmp_header;  long header_counter = 0;  for(long i=0; i<num_grammars; i++) {    // add the grammar head line    //    acoustic_text(i).assign(L"grammar = {\n ");    acoustic_text(i).concat(tmp_header);    // get text line by line    //    while(read_acoustic_file.get(str_lm)) {      // check if a new grammar is reached      // (a grammar always starts with #JSGF header)      //      String sub;      long pos(0);      str_lm.tokenize(sub, pos);	      if(sub.eq(L"#JSGF")) {	if(header_counter>0) {	  tmp_header.assign(str_lm);	  tmp_header.concat(L'\n');	  break;	}	header_counter++;      }      acoustic_text(i).concat(str_lm);      acoustic_text(i).concat(L'\n');    }    // add the grammar end line    //    acoustic_text(i).concat(L"};\n\n");  } // end of for loop    // close the input acoustic file  //  read_acoustic_file.close();  // get grammar names from the text grammars  //  Vector<String> grammar_name_list;  getGrammarNames(grammar_name_list, acoustic_text);    // useful variables  //  boolean default_model = false;  boolean reserved_symbol = false;  long def_model_index = 0;  long reserved_word_index = 0;  String reserved_word;  // loop through the grammar name list for searching  //  for (long i = 0; i < grammar_name_list.length(); i++) {    // 1: search the grammar defining ISIP default acoustic model    // in the input grammars    //    if (grammar_name_list(i).eq(L"ISIP_JSGF_1_0_DEFAULT_ACOUSTIC_MODEL")) {      default_model = true;      def_model_index = i;    }      // 2: search the grammar defining ISIP user-defined reserved symbol    // in the input grammars    //    if (grammar_name_list(i).eq(L"USER_RESERVED_SYMBOL")) {      reserved_symbol = true;      reserved_word_index = i;      // get the reserved symbol      //      String sub, delim(L" };\n");      long pos = 0;               // tokenize the grammar text      //      while(acoustic_text(i).tokenize(sub, pos, delim)) {	// search "public" keyword 	//	if(sub.eq(L"public")) {	  // get the token after "=" sign in the public rule line	  //	  acoustic_text(i).tokenize(sub, pos, delim);	  acoustic_text(i).tokenize(sub, pos, delim);	  acoustic_text(i).tokenize(sub, pos, delim);	  // the token is the user-defined reserved symbol	  //	  reserved_word.assign(sub);	}      } // end of the while loop     } // end: if (grammar_name_list(i).eq(L"USER_RESERVED_SYMBOL"))  }  // make sure the reserved symbol is not used in the grammars  // other than the default model grammar and the reservation grammar  //  if(reserved_symbol) {    // check through all input grammars    //    for(long i = 0; i<acoustic_text.length(); i++) {      Vector<String> tmp;      // skip the two reserved default definition grammars      //      if((i != reserved_word_index) && (i != def_model_index)) {	// get symbol list in the given grammar	//
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -