📄 isip_model_creator.cc
字号:
getSymbols(acoustic_text(i), tmp); // check if the reserved word is used by each symbol // for(long j = 0; j < tmp.length(); j++) { String sub_str, delim(L" _0123456789"); long pos = 0; tmp(j).tokenize(sub_str, pos, delim); // if any grammar illegally uses the reserved word // output the grammar and return error message // if(sub_str.eq(reserved_word)) { String output_grammar(acoustic_text(i)); Console::put(L"\n\nThe following grammar is illegal in using reserved symbol\n\n"); Console::put(output_grammar); return Error::handle(L"acoustic input file", L"the user-reserved symbol has been illegally used in other grammar definitions\n", Error::TEST, __FILE__, __LINE__); } } // end: for(long j = 0; j < tmp.length(); j++) { } // end: if((i != reserved_word_index) && (i != def_model_index)) } // end: for(long i = 0; i<acoustic_text.length(); i++) } // make sure each input symbol in the upper level has a corresponding // grammar in this level // Vector<String> grammars; long reserved_word_counter = 1; for (long i = 0; i < input_symbol_list_a.length(); i++) { boolean found = false; for(long j=0; j<grammar_name_list.length(); j++) { if(input_symbol_list_a(i).eq(grammar_name_list(j)) || grammar_name_list(j).eq(L"ISIP_JSGF_1_0_START") || grammar_name_list(j).eq(L"ISIP_JSGF_1_0_TERM")) { found = true; grammars.concat(acoustic_text(j)); } } if(!found) { // if no corresponding grammar defined, use the user-defined // default acoustic model grammar // if(default_model) { String message(L"Note: symbol \""); message.concat(input_symbol_list_a(i)); message.concat(L"\" uses the user-defined default acoustic model.\n"); Console::put(message); // create a grammar for the given symbol according to the default // String tmp_grammar; if(reserved_symbol) { defaultGrammar(tmp_grammar, input_symbol_list_a(i), acoustic_text(def_model_index), reserved_word, reserved_word_counter); } else { // if user does not define the reserved symbol word // return Error::handle(L"substitution for default acoustic model", L"user has not yet defined the reserved symbol for the substitution.\nPlease add a JSGF grammar to define the reserved symbol in the input acoustic text file!\n", Error::TEST, __FILE__, __LINE__); } grammars.concat(tmp_grammar); } // if neither corresponding grammar nor default model grammar // can be found, return error message // else { String output(L"symbol \""); output.concat(input_symbol_list_a(i)); output.concat(L"\""); return Error::handle(output, L"neither corresponding grammar nor default model defined in the acoustic file\n", Error::TEST, __FILE__, __LINE__); } } } // then store the input acoustic models in the input sof file // // declare strings to contain the grammars of definitions for // ISIP graph starting and stoping points // String start_def, term_def; // vector of string to store acoustic grammars // Vector<String> acoustic_grammar; // declare the line of algorithm tag // String algo_tag(L"algorithm = \"JSGF\";\n"); // set the size of the object to be written // long size_1 = algo_tag.length(); long size_2 = algo_tag.length(); // boolean variables to indicate if there are definitions // for graph ending points // boolean def_start = false; boolean def_term = false; // loop throught all the grammars in the input acoustic text file // for(long i=0; i < grammars.length(); i++) { if(isStart(grammars(i))) { start_def.assign(grammars(i)); size_1 += grammars(i).length(); def_start = true; } else if(isTerm(grammars(i))) { term_def.assign(grammars(i)); size_1 += grammars(i).length(); def_term = true; } else { // this is acoustic grammar // acoustic_grammar.concat(grammars(i)); size_2 += grammars(i).length(); } } // write the definitions of graph starting and stoping points // into the sof file, if they are available // if(def_start && def_term) { sof_a.put(L"JSGF", 100, size_1); sof_a.puts(algo_tag); sof_a.puts(start_def); sof_a.puts(term_def); } // write the acoustic model into the sof file // sof_a.put(L"JSGF", 2, size_2); sof_a.puts(algo_tag); for(long i=0; i<acoustic_grammar.length(); i++) { sof_a.puts(acoustic_grammar(i)); } // get the symbol list in this level // for(long i=0; i<acoustic_grammar.length(); i++) { Vector<String> tmp, symbol_list; getSymbols(acoustic_grammar(i), tmp); // check if any symbol is already in the symbol list // for(long i=0; i<tmp.length(); i++) { boolean exist = false; for(long j=0; j<output_symbol_list_a.length(); j++) { if(tmp(i).eq(output_symbol_list_a(j))) { exist = true; } } if(!exist) { symbol_list.concat(tmp(i)); } } output_symbol_list_a.concat(symbol_list); } // exit gracefully // return true; }// function: create an acoustic grammar based on the user-defined// default acoustic model grammar//boolean defaultGrammar(String& grammar_a, String& symbol_a, String& def_model_a, String& reserved_word_a, long& reserved_word_counter_a) { // process the rule definition in the default model // String rule; String sub, delim(L" };\n"); long pos = 0; boolean rule_reached = false; // preprocess the input model string by picking up the actual model grammar // surrounded by { and } // String def_model; def_model_a.substr(def_model, 12, def_model_a.length() - 12 - 4); // then tokenize the actual model grammar string // delim.assign(L" ;\n"); while(def_model.tokenize(sub, pos, delim)) { // search "public" keyword // if(sub.eq(L"public")) { // get to the token after "=" sign in the public rule line // def_model.tokenize(sub, pos, delim); def_model.tokenize(sub, pos, delim); rule_reached = true; } // process following symbols // if(rule_reached) { delim.assign(L" \n"); while(def_model.tokenize(sub, pos, delim)) { // add space between each two adjacent tokens // rule.concat(L" "); // discard ";", "+" or "*" sign for further symbol check // String tmp; Char end(sub(sub.length() - 1)); boolean discard = false; boolean semicolon = false; // if a semi-colon is found at the end of the symbol // if (end.eq(L';')) { sub.substr(tmp, 0, sub.length() - 1); sub.assign(tmp); end.assign(sub(sub.length() - 1)); semicolon = true; } // if "*" or "+" is found at the end of the symbol // if(end.eq(L'*') || end.eq(L'+')) { sub.substr(tmp, 0, sub.length() - 1); sub.assign(tmp); discard = true; } // if a symbol is met, substitute it as the default symbol // if(isSymbol(sub)) { sub.assign(reserved_word_a); sub.concat(L"_"); sub.concat(reserved_word_counter_a); reserved_word_counter_a++; } // if discarding already happened, add the discarded // "*" or "+" sign back to the sub-string // if(discard) { sub.concat(end); } // if semicolon has been met and discarded, add the discarded // ";" back to tmp with an additional new-line character // if(semicolon) { sub.concat(L";\n "); } // add the rule component to the new rule line // rule.concat(sub); } } } // end of the outer while loop // drop the space at the end of the rule string // String tmp_rule; rule.substr(tmp_rule, 0, rule.length() - 1); rule.assign(tmp_rule); // create a substituted grammar for the input symbol // grammar_a.assign(L"grammar = {\n #JSGF V1.0;\n"); grammar_a.concat(L" // Define the grammar name\n"); grammar_a.concat(L" grammar network.grammar."); grammar_a.concat(symbol_a); grammar_a.concat(L";\n\n"); grammar_a.concat(L" // Define the rules using the default model\n"); grammar_a.concat(L" public <"); grammar_a.concat(symbol_a); grammar_a.concat(L"> ="); grammar_a.concat(rule); grammar_a.concat(L"};\n\n"); // exit gracefully // return true; }// function: get grammar names from given grammars//boolean getGrammarNames(Vector<String>& grammar_name_list_a, Vector<String>& grammar_list_a) { for(long i=0; i<grammar_list_a.length(); i++) { String sub, delim(L" \n;"); long pos = 0; String tmp(grammar_list_a(i)); // tokenize the input grammar // while(tmp.tokenize(sub, pos, delim)) { // search "public" keyword // if(sub.eq(L"public")) { // get the folowing public rulename // tmp.tokenize(sub, pos, delim); // take off the surrounding < and > signs // String grammar_name; sub.substr(grammar_name, 1, sub.length()-2); // add it to the list // grammar_name_list_a.concat(grammar_name); } } // end of the while loop } // end of the for loop // exit gracefully // return true;}// function: initialize statistical models based on given symbols//boolean initializeStat(Sof& sof_a, Vector<String>& symbol_list_a, long& num_features_a) { // number of symbols in the input symbol list // long num_symbols = 0; // declare a hash table // HashTable<SearchSymbol, Long> symbol_hash; // insert each symbol and its index into the hash table // for(long i = 0; i < symbol_list_a.length(); i++) { SearchSymbol symbol; Long* index; symbol.assign(symbol_list_a(i)); index = new Long(i); symbol_hash.insert(symbol, index); // get the number of symbols // num_symbols++; } // write the hash table into the output sof file // symbol_hash.write(sof_a, 2, L"symbol_hashtable"); // initialize statistical models // // declare variables // Vector<MixtureModel> mix_models(num_symbols); Vector<StatisticalModel> stat_models(num_symbols); VectorFloat weights(1), mean(num_features_a); MatrixFloat cov; String mean_str, value_str; // set Gaussian model // create mean and value strings // for (long i = 0; i < num_features_a - 1; i++) { mean_str.concat(L"0.0, "); value_str.concat(L"1.0, "); } mean_str.concat(L"0"); value_str.concat(L"1.0"); // set mean // mean.assign(mean_str); // set covariance // cov.assign(num_features_a, num_features_a, value_str, Integral::DIAGONAL); // set Gaussian // GaussianModel gau_model; gau_model.setMean(mean); gau_model.setCovariance(cov); // set weights for mixture model // weights.assign(L"1"); // set each stat models // for (long i = 0; i < num_symbols; i++) { // set mixture model // mix_models(i).setWeights(weights); mix_models(i).add(gau_model); // set statistical model // stat_models(i).setType(StatisticalModel::MIXTURE_MODEL); stat_models(i).assign(mix_models(i)); } // write the statistical models into the output sof file // stat_models.write(sof_a, 2, L"stat_models"); // exit gracefully // return true;}// function: read transcription input text file and output it into a sof file//boolean convertTrans(Filename& trans_file_a, Sof& sof_a) { // declare a string vector to store the transcription information // Vector<String> trans; // open the input file in read mode // File read_trans_file; if (!read_trans_file.open(trans_file_a, File::READ_ONLY)) { Console::put(L"Error in opening transcription input file"); } // read the string lines // String input_line; while(read_trans_file.get(input_line)) { trans.concat(input_line); } // close the input text file // read_trans_file.close(); // write the transcription into the output sof file // trans.write(sof_a, 0); // exit gracefully // return true;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -