📄 isip_model_creator.cc
字号:
// file: $isip/util/speech/isip_model_creator/isip_model_creator.cc// version: $Id: isip_model_creator.cc,v 1.11 2002/07/16 03:10:00 wang Exp $//// isip include files//#include "isip_model_creator.h"// isip_model_creator// This untility porogram converts four input text files:// 1. language model file in JSGF// 2. lexicon file// 3. acoustic model file in JSGF)// 4. transcription file// Then, output three SOF files:// 1. a language model SOF file containing the converted first three inputs// 2. a statistical model SOF file containing initialized statistical models// 3. a transcription SOF file containing a vector of transcription strings//// function prototypes//boolean getSymbols(String grammar_a, Vector<String>& symbol_list_a);boolean isSymbol(const String& arg_a);boolean convertLM(Filename& lm_file_a, Sof& sof_a, Vector<String>& symbol_list_a);boolean isStart(String& arg_a);boolean isTerm(String& arg_a);boolean isNonSpeech(String& arg_a);boolean convertLexicon(Filename& lexicon_file_a, Sof& sof_a, Vector<String>& input_symbol_list_a, Vector<String>& output_symbol_list_a);boolean convertAcoustic(Filename& acoustic_file_a, Sof& sof_a, Vector<String>& input_symbol_list_a, Vector<String>& output_symbol_list_a);boolean defaultGrammar(String& grammar_a, String& symbol_a, String& def_model_a, String& reserved_word_a, long& reserved_word_counter_a);boolean getGrammarNames(Vector<String>& grammar_name_list_a, Vector<String>& grammar_list_a);boolean initializeStat(Sof& sof_a, Vector<String>& symbol_list_a, long& num_features_a);boolean convertTrans(Filename& trans_file_a, Sof& sof_a);// main application//int main(int argc, const char** argv) { // setup the command line // Sdb sdb; CommandLine cmdl(sdb); cmdl.setUsage(#include "usage_message.text" ); cmdl.setHelp(#include "help_message.text" ); cmdl.setIdent("$Revision: 1.11 $", "$Name: isip_r00_n11 $", "$Date: 2002/07/16 03:10:00 $"); // add a command line option for the language model file // Filename lm_file; Filename def_lm_input_file(L"lm_model.text"); cmdl.addOptionParam(lm_file, OPTION_LM_FILE, def_lm_input_file); // add a command line option for the lexicon file // Filename lexicon_file; Filename def_lexicon_input_file(L"lexicon.text"); cmdl.addOptionParam(lexicon_file, OPTION_LEXICON_FILE, def_lexicon_input_file); // add a command line option for the acoustic model file // Filename acoustic_file; Filename def_acoustic_input_file(L"acoustic_model.text"); cmdl.addOptionParam(acoustic_file, OPTION_ACOUSTIC_FILE, def_acoustic_input_file); // add a command line option for the transcription file // Filename trans_file; Filename def_trans_input_file(L"trans.text"); cmdl.addOptionParam(trans_file, OPTION_TRANSCRIPTION_FILE, def_trans_input_file); // add a command line option for the output language model file // Filename output_lm_file; Filename def_lm_output_file(L"lm_model.sof"); cmdl.addOptionParam(output_lm_file, OPTION_OUTPUT_LM_MODEL, def_lm_output_file); // add a command line option for the output statistical model file // Filename output_stat_file; Filename def_stat_output_file(L"stat_model.sof"); cmdl.addOptionParam(output_stat_file, OPTION_OUTPUT_STAT_MODEL, def_stat_output_file); // add a command line option for the output transcription file // Filename output_trans_file; Filename def_trans_output_file(L"trans.sof"); cmdl.addOptionParam(output_trans_file, OPTION_OUTPUT_TRANSCRIPTION, def_trans_output_file); // parse the command line // if (!cmdl.parse(argc, argv)) { cmdl.printUsage(); } // read input model files and convert them into the output LM sof file // // declare and open output Sof file for storing the converted models // Sof lm_sof; lm_sof.open(output_lm_file, File::WRITE_ONLY, File::TEXT); // declare symbol lists // Vector<String> lm_symbol_list; Vector<String> lexicon_symbol_list; Vector<String> acoustic_symbol_list; // read language model text file // convertLM(lm_file, lm_sof, lm_symbol_list); // read lexicon text file // convertLexicon(lexicon_file, lm_sof, lm_symbol_list, lexicon_symbol_list); // read accoustic model text file // convertAcoustic(acoustic_file, lm_sof, lexicon_symbol_list, acoustic_symbol_list); // close the output sof file // lm_sof.close(); // set the number of features // long num_feature = 1; // declare and open output Sof file for storing the statistical models // Sof stat_sof; stat_sof.open(output_stat_file, File::WRITE_ONLY, File::TEXT); // initialize statistical models // initializeStat(stat_sof, acoustic_symbol_list, num_feature); // close the output sof file // stat_sof.close(); // declare and open output Sof file for storing the transcription // Sof trans_sof; trans_sof.open(output_trans_file, File::WRITE_ONLY, File::TEXT); // read the transcription text file // convertTrans(trans_file, trans_sof); // close the output sof file // trans_sof.close(); // exit gracefully // return Integral::exit();}// function implementations//// function: get a list of symbols in a given search level in the sof file//boolean getSymbols(String grammar_a, Vector<String>& symbol_list_a) { String sub, delim(L" \n;+*"); long pos = 0; boolean rule_reached = false; // tokenize the input grammar // while(grammar_a.tokenize(sub, pos, delim)) { // search "public" keyword // if(sub.eq(L"public")) { rule_reached = true; } // starting from the public rule, get symbols // if(rule_reached) { // if the sub string is a symbol // if(isSymbol(sub) && sub.ne(L"public")) { // check if it's already in the symbol list // boolean exist = false; for(long i=0; i<symbol_list_a.length(); i++) { if(sub.eq(symbol_list_a(i))) { exist = true; } } if(!exist) { // add the symbol to the symbol list // symbol_list_a.concat(sub); } } } } // end of the while loop // exit gracefully // return true;}// function: check if a string is a symbol//boolean isSymbol(const String& arg_a) { for(long i=0; i<arg_a.length(); i++) { Char tmp = arg_a(i); if(!(tmp.isAlpha() || tmp.isDigit() || tmp.eq(L'!') || tmp.eq(L'_'))) return false; } // exit gracefully // return true;}// function: convert language model input text file//boolean convertLM(Filename& lm_file_a, Sof& sof_a, Vector<String>& symbol_list_a) { // number of grammars contained in the input file // long num_grammars = 0; // open the input lm_file in read mode // File read_lm_file; if (!read_lm_file.open(lm_file_a, File::READ_ONLY)) { Console::put(L"Error in opening language model input file"); } // get the number of grammars in the input file // String str; while(read_lm_file.get(str)) { String sub; long pos(0); str.tokenize(sub, pos); if(sub.eq(L"#JSGF")) num_grammars++; } // close the input text file // read_lm_file.close(); // check if the number of grammar is correct // this LM input file should include one model grammar, two graph // default vertices definitions and a grammar for nonspeech symbols. // But the vertices definitions and nonspeech definitions are optional // if(!(num_grammars == 4 || num_grammars == 3 || num_grammars == 1)) { return Error::handle(lm_file_a, L"invalid number of grammars\n", Error::TEST, __FILE__, __LINE__); } // declare a vector of strings to contain the input text file // Vector<String> lm_text(num_grammars); // open the input file again // if (!read_lm_file.open(lm_file_a, File::READ_ONLY)) { Console::put(L"Error in opening language model input file"); } // read the input text and add it line by line to the string vector // String str_lm, tmp_header; long header_counter = 0; for(long i=0; i<num_grammars; i++) { // add the grammar head line // lm_text(i).assign(L"grammar = {\n"); lm_text(i).concat(tmp_header); // get text line by line // while(read_lm_file.get(str_lm)) { // check if a new grammar is reached // (a grammar always starts with #JSGF header) // String sub; long pos(0); str_lm.tokenize(sub, pos); if(sub.eq(L"#JSGF")) { if(header_counter>0) { tmp_header.assign(str_lm); tmp_header.concat(L'\n'); break; } header_counter++; } lm_text(i).concat(str_lm); lm_text(i).concat(L'\n'); } // add the grammar end line // lm_text(i).concat(L"};\n\n"); } // end of for loop // close the input lm_file // read_lm_file.close(); // start storing the input language models in the input sof file // // declare strings to contain the grammars for the default // ISIP graph vertices, nonspeech grammars, and lm grammars // String start_def, term_def, nonsp_def, lm_grammar; // declare the line of algorithm tag // String algo_tag(L"algorithm = \"JSGF\";\n"); // set the size of the object to be written // long size_1 = algo_tag.length(); long size_2 = algo_tag.length(); // boolean variables to indicate if there are definitions // for graph ending points // boolean def_start = false; boolean def_term = false; boolean def_nonsp = false; // loop throught all the grammars in the input LM text file // for (long i=0; i<lm_text.length(); i++) { if (isStart(lm_text(i))) { start_def.concat(lm_text(i)); def_start = true; } else if (isTerm(lm_text(i))) { term_def.concat(lm_text(i)); def_term = true; } else if (isNonSpeech(lm_text(i))) { nonsp_def.concat(lm_text(i)); def_nonsp = true; } else { // this is LM grammar // lm_grammar.concat(lm_text(i)); } } // set size // size_1 += start_def.length() + term_def.length(); size_2 += nonsp_def.length() + lm_grammar.length(); // write the definitions of graph starting and stoping points // into the sof file // if (def_start && def_term) { sof_a.put(L"JSGF", 100, size_1); sof_a.puts(algo_tag); sof_a.puts(start_def); sof_a.puts(term_def); } // write the language model into the sof file // sof_a.put(L"JSGF", 0, size_2); sof_a.puts(algo_tag); if (def_nonsp) { sof_a.puts(nonsp_def); } sof_a.puts(lm_grammar); // get the symbol list in this level // getSymbols(lm_grammar, symbol_list_a); // exit gracefully // return true; }// function: check if the input grammar string is a definition for// ISIP default graph starting point//boolean isStart(String& arg_a) { String sub, input(arg_a), delim(L" \n"); long pos = 0; // search "public" keyword // while(input.tokenize(sub, pos, delim)) { if(sub.eq(L"public")) { // check the next token // input.tokenize(sub, pos, delim); if(sub.eq(L"<ISIP_JSGF_1_0_START>")) { // found the start symbol definition // return true; } } } // start symbol definition not found // return false;}// function: check if the input grammar string is a definition for// ISIP default graph terminal point//boolean isTerm(String& arg_a) { String sub, input(arg_a), delim(L" \n"); long pos = 0; // search "public" keyword // while(input.tokenize(sub, pos, delim)) { if(sub.eq(L"public")) { // check the next token // input.tokenize(sub, pos, delim); if(sub.eq(L"<ISIP_JSGF_1_0_TERM>")) { // found the terminal symbol definition // return true; } } } // terminal symbol definition not found // return false;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -