📄 isip_model_creator.cc
字号:
// function: check if the input grammar string is a definition for// ISIP reserved nonspeech symbols//boolean isNonSpeech(String& arg_a) { String sub, input(arg_a), delim(L" \n"); long pos = 0; // search "public" keyword // while(input.tokenize(sub, pos, delim)) { if(sub.eq(L"public")) { // check the next token // input.tokenize(sub, pos, delim); if(sub.eq(L"<ISIP_NON_SPEECH>")) { return true; } } } // the definition not found // return false;}// function: convert lexicon input text file//boolean convertLexicon(Filename& lexicon_file_a, Sof& sof_a, Vector<String>& input_symbol_list_a, Vector<String>& output_symbol_list_a) { // number of grammars contained in the input file // long num_grammars = 0; boolean def_nonsp = false; // open the input file in read mode // File read_lexicon_file; if (!read_lexicon_file.open(lexicon_file_a, File::READ_ONLY)) { Console::put(L"Error in opening lexicon input file"); } // declare variables // String str; Vector<String> nonsp_def, pre_list, word_list, rule_list; boolean JSGF_found = false; // read each line // while(read_lexicon_file.get(str)) { // look for the JSGF grammar that defines the nonspeech symbols // if (str.eq(L"#JSGF V1.0;")) { JSGF_found = true; } // after meeting the JSGF grammar, get the JSGF grammar that // defines the nonspeech symbols // if (JSGF_found) { nonsp_def.concat(str); } // before meeting the JSGF grammar, process lexicon // else { // pre-process the input lexicon lines and merge the same lines // boolean same = false; for(long i=0; i<pre_list.length(); i++) { if(str.eq(pre_list(i))) same = true; } if(!same) { pre_list.concat(str); } } } // create grammar line for the nonspeech JSGF grammar // String nonsp_grammar(L"grammar = {\n"); for (long i = 0; i < nonsp_def.length(); i++) { nonsp_grammar.concat(L" "); nonsp_grammar.concat(nonsp_def(i)); nonsp_grammar.concat(L"\n"); } nonsp_grammar.concat(L"};\n\n"); // check if non-speech grammar is available // if (isNonSpeech(nonsp_grammar)) { def_nonsp = true; } // process each lexicon line after pre-processing in the pre_list // for(long i=0; i<pre_list.length(); i++) { String head_word, symbol, sequence; long pos(0); String delim(L" "); String lex_str(pre_list(i)); // get the first word in the lexicon line // lex_str.tokenize(head_word, pos); // tokenize out each symbol sequentially from the lexicon line // while(lex_str.tokenize(symbol, pos, delim)) { if(symbol.ne(head_word)) { sequence.concat(L" /0/ "); sequence.concat(symbol); } } // check if the head_word already exists in the word list // boolean exist = false; long alt_index = 0; for(long i=0; i<word_list.length(); i++) { if(head_word.eq(word_list(i))) { exist = true; alt_index = i; } } // if the head word is not available in the word_list yet // if(!exist) { // add the head word to the word_list // word_list.concat(head_word); num_grammars++; // also add the rule sequence to the rule_list // rule_list.concat(sequence); } // if the head word is already included in the word_list // else { // add the sequence rule part to the rule list as alternative // String tmp; // get existing rule in the rule line // String exist_rule(rule_list(alt_index)); Char end(exist_rule(rule_list(alt_index).length()-1)); // if no alternative relation exists in the rule line // ( and ) signs need to be added to surround the existing sequence rule // so that we add new alternative parallel to the existing sequence // if(!end.eq(')')) { tmp.assign(L" ("); tmp.concat(exist_rule); tmp.concat(L" )"); } else { tmp.assign(exist_rule); } // add the new sequence as alternative // tmp.concat(L" | ("); tmp.concat(sequence); tmp.concat(L" )"); // update the rule in the rule list // rule_list(alt_index).assign(tmp); } } // end: for(long i=0; i<pre_list.length(); i++) // close the input text file // read_lexicon_file.close(); // make sure each symbol in the input symbol list has a corresponding // grammar defined in the lexicon file // if(num_grammars != input_symbol_list_a.length()) { // output warning // Console::put(L"\nWarning: the number of grammars in the lexicon input file\ndoes not match the number of symbols in the LM input file.\n\n"); } Vector<String> words, rules; for(long i=0; i<input_symbol_list_a.length(); i++) { boolean found = false; for(long j=0; j<word_list.length(); j++) { if(input_symbol_list_a(i).eq(word_list(j))) { found = true; words.concat(word_list(j)); rules.concat(rule_list(j)); } } if(!found) { String output(L"symbol \""); output.concat(input_symbol_list_a(i)); output.concat(L"\""); return Error::handle(output, L"no corresponding grammar in lexicon file\n", Error::TEST, __FILE__, __LINE__); } } // update the number of lexicon grammars to fit the input symbol list // num_grammars = words.length(); // declare a vector of string to store JSGF grammar strings // Vector<String> grammars(num_grammars); // declare and initialize a string to store a single JSGF grammar // String grammar_str(L"grammar = {\n"); grammar_str.concat(L" #JSGF V1.0;\n"); grammar_str.concat(L" // Define the grammar name\n"); grammar_str.concat(L" grammar network.grammar."); for(long i=0; i<grammars.length(); i++) { grammars(i).assign(grammar_str); grammars(i).concat(words(i)); grammars(i).concat(L";\n\n // Define the rules\n public <"); grammars(i).concat(words(i)); grammars(i).concat(L"> = "); grammars(i).concat(L"<ISIP_JSGF_1_0_START>"); Char end(rules(i)(rules(i).length()-1)); // if the rule includes alternative // if(end.eq(L')')) { grammars(i).concat(L" ("); grammars(i).concat(rules(i)); grammars(i).concat(L" )"); } // if it includes only sequence // else { grammars(i).concat(rules(i)); } grammars(i).concat(L" /0/ <ISIP_JSGF_1_0_TERM>;\n"); grammars(i).concat(L"};\n\n"); } // write the grammars into the output Sof file // // define algorithm tag line // String algo_tag(L"algorithm = \"JSGF\";\n"); // set the size of the object to be written // long size = algo_tag.length(); if (def_nonsp) { size += nonsp_grammar.length(); } for(long i=0; i<grammars.length(); i++) { size += grammars(i).length(); } // write the lexicon model into the sof file // sof_a.put(L"JSGF", 1, size); sof_a.puts(algo_tag); if (def_nonsp) { sof_a.puts(nonsp_grammar); } for(long i=0; i<grammars.length(); i++) { sof_a.puts(grammars(i)); } // get the symbol list in this level // for(long i=0; i<grammars.length(); i++) { Vector<String> tmp, symbol_list; getSymbols(grammars(i), tmp); // check if any symbol is already in the symbol list // for(long i=0; i<tmp.length(); i++) { boolean exist = false; for(long j=0; j<output_symbol_list_a.length(); j++) { if(tmp(i).eq(output_symbol_list_a(j))) { exist = true; } } if(!exist) { symbol_list.concat(tmp(i)); } } output_symbol_list_a.concat(symbol_list); } // exit gracefully // return true;}// function: convert acoustic model input text file//boolean convertAcoustic(Filename& acoustic_file_a, Sof& sof_a, Vector<String>& input_symbol_list_a, Vector<String>& output_symbol_list_a) { // number of grammars contained in the input file // long num_grammars = 0; // open the input file in read mode // File read_acoustic_file; if (!read_acoustic_file.open(acoustic_file_a, File::READ_ONLY)) { Console::put(L"Error in opening acoustic model input file"); } // get the number of grammars in the input file // String str; while(read_acoustic_file.get(str)) { String sub; long pos(0); str.tokenize(sub, pos); if(sub.eq(L"#JSGF")) num_grammars++; } // close the input text file // read_acoustic_file.close(); // declare a vector of strings to contain the input text file // Vector<String> acoustic_text(num_grammars); // open the input file again // if (!read_acoustic_file.open(acoustic_file_a, File::READ_ONLY)) { Console::put(L"Error in opening acoustic model input file"); } // read the input text and add it line by line to the string vector // String str_lm, tmp_header; long header_counter = 0; for(long i=0; i<num_grammars; i++) { // add the grammar head line // acoustic_text(i).assign(L"grammar = {\n "); acoustic_text(i).concat(tmp_header); // get text line by line // while(read_acoustic_file.get(str_lm)) { // check if a new grammar is reached // (a grammar always starts with #JSGF header) // String sub; long pos(0); str_lm.tokenize(sub, pos); if(sub.eq(L"#JSGF")) { if(header_counter>0) { tmp_header.assign(str_lm); tmp_header.concat(L'\n'); break; } header_counter++; } acoustic_text(i).concat(str_lm); acoustic_text(i).concat(L'\n'); } // add the grammar end line // acoustic_text(i).concat(L"};\n\n"); } // end of for loop // close the input acoustic file // read_acoustic_file.close(); // get grammar names from the text grammars // Vector<String> grammar_name_list; getGrammarNames(grammar_name_list, acoustic_text); // useful variables // boolean default_model = false; boolean reserved_symbol = false; long def_model_index = 0; long reserved_word_index = 0; String reserved_word; // loop through the grammar name list for searching // for (long i = 0; i < grammar_name_list.length(); i++) { // 1: search the grammar defining ISIP default acoustic model // in the input grammars // if (grammar_name_list(i).eq(L"ISIP_JSGF_1_0_DEFAULT_ACOUSTIC_MODEL")) { default_model = true; def_model_index = i; } // 2: search the grammar defining ISIP user-defined reserved symbol // in the input grammars // if (grammar_name_list(i).eq(L"USER_RESERVED_SYMBOL")) { reserved_symbol = true; reserved_word_index = i; // get the reserved symbol // String sub, delim(L" };\n"); long pos = 0; // tokenize the grammar text // while(acoustic_text(i).tokenize(sub, pos, delim)) { // search "public" keyword // if(sub.eq(L"public")) { // get the token after "=" sign in the public rule line // acoustic_text(i).tokenize(sub, pos, delim); acoustic_text(i).tokenize(sub, pos, delim); acoustic_text(i).tokenize(sub, pos, delim); // the token is the user-defined reserved symbol // reserved_word.assign(sub); } } // end of the while loop } // end: if (grammar_name_list(i).eq(L"USER_RESERVED_SYMBOL")) } // make sure the reserved symbol is not used in the grammars // other than the default model grammar and the reservation grammar // if(reserved_symbol) { // check through all input grammars // for(long i = 0; i<acoustic_text.length(); i++) { Vector<String> tmp; // skip the two reserved default definition grammars // if((i != reserved_word_index) && (i != def_model_index)) { // get symbol list in the given grammar //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -