📄 isip_network_converter.cc
字号:
// file: $isip/util/asr/isip_lm_converter.cc// version: $Id: isip_network_converter.cc,v 1.9 2003/05/12 22:28:49 huang Exp $//// isip include files//#include <LanguageModel.h>#include "isip_network_converter.h"// isip_net_converter: language model converter utility//// this is the language model converter utility. it can be used for// convert the output of network builder to the model file that can// be used for the isip_recognizer.//int main(int argc, const char** argv) { // set the sdb // Sdb sdb; CommandLine cmdl(sdb); cmdl.setUsage(#include "usage_message.text" ); cmdl.setHelp(#include "help_message.text" ); cmdl.setIdent("$Revision: 1.9 $", "$Name: isip_r00_n11 $", "$Date: 2003/05/12 22:28:49 $"); // add a command line option for the output mode // String input_format(L"NATIVE"); cmdl.addOptionParam(input_format, OPTION_INPUT_FORMAT, L"NATIVE"); // add a command line option for the input mode // String output_format(L"JSGF"); cmdl.addOptionParam(output_format, OPTION_OUTPUT_FORMAT, L"JSGF"); // add a command line option for output type // String output_type(L"TEXT"); cmdl.addOptionParam(output_type, OPTION_OUTPUT_TYPE, L"TEXT"); // add a command line option for the debug level // DebugLevel debug_level; cmdl.addOptionParam(debug_level, OPTION_DEBUG_LEVEL, DEBUG_LEVEL_DEFAULT); // parse the command line // if (!cmdl.parse(argc, argv)) { cmdl.printUsage(); }; // input file and output file // Filename input_lm_file; Filename output_lm_file; Filename output_sm_file; // get the input file // if ( !sdb.gotoFirst() ){ cmdl.printUsage(); } sdb.getName(input_lm_file); // get the output language model file // if ( !sdb.gotoNext() ){ cmdl.printUsage(); } sdb.getName(output_lm_file); // get the output statistical model file // if ( !sdb.gotoNext() ){ cmdl.printUsage(); } sdb.getName(output_sm_file); // print the command line // cmdl.printCommandLine(); cmdl.printVersion(); // check the input file arguments // if(input_lm_file.eq(NULL)) { String msg(L"Error: no input file specified "); Console::put(msg); Error::handle(PROG_NAME, L"main", Error::NO_PARAM_FILE, __FILE__, __LINE__); }; if(output_lm_file.eq(NULL)) { String msg(L"Error: no output file specified "); Console::put(msg); Error::handle(PROG_NAME, L"main", Error::NO_PARAM_FILE, __FILE__, __LINE__); }; // set the options to upper case // input_format.toUpper(); output_format.toUpper(); output_type.toUpper(); //Console::put(input_format); //Console::put(output_format); //Console::put(output_type); if ( input_format.eq(output_format)){ String msg(L"input and output formats are the same "); Console::put(msg); Error::handle(PROG_NAME, L"main", Error::ARG, __FILE__, __LINE__); } // set the output type // LanguageModel::OUTPUT_TYPE out_type = LanguageModel::TEXT; if(output_type.eq(L"TEXT") || output_type.eq(L"text")) { out_type = LanguageModel::TEXT; } else if(output_type.eq(L"BINARY") || output_type.eq(L"binary")) { out_type = LanguageModel::BINARY; } else { cmdl.printUsage(); } // input is the native java network builder format // if ( input_format.eq(L"NATIVE") && output_format.eq(L"DIGRAPH")){ nativeToModel(input_lm_file, output_lm_file, output_sm_file, LanguageModel::NATIVE, out_type); } else if ( input_format.eq(L"NATIVE") && output_format.eq(L"JSGF")){ nativeToModel(input_lm_file, output_lm_file, output_sm_file, LanguageModel::JSGF, out_type); } else if ( input_format.eq(L"DIGRAPH") || input_format.eq(L"JSGF") ){ if ( output_format.eq(L"NATIVE") ){ modelToNative(input_lm_file, output_lm_file); } else if (output_format.eq(L"JSGF")) { modelToModel(input_lm_file, output_lm_file, LanguageModel::JSGF, out_type); } else if (output_format.eq(L"DIGRAPH")) { modelToModel(input_lm_file, output_lm_file, LanguageModel::NATIVE, out_type); } else { cmdl.printUsage(); } } else{ cmdl.printUsage(); } // exit gracefully // return Integral::exit();}// method: modelToModel//// arguments:// FileName& input_file_name_a: (input) file name// FileName& output_file_name_a: (output) file name//// return: boolean value indicating status//// this method reads the data into a hash table//boolean modelToModel(Filename& input_file_name_a, Filename& output_file_name_a, LanguageModel::OUTPUT_FORMAT out_format_a, LanguageModel::OUTPUT_TYPE out_type_a){ // the search levels, sub-graphs are maintained at each SearchLevel // Vector<SearchLevel> search_levels(1); // use the language model class to output different types // LanguageModel lm_model_01; lm_model_01.load(input_file_name_a, search_levels); // use the language model class to output different types // LanguageModel lm_model_02; lm_model_02.store(output_file_name_a, search_levels, out_format_a, out_type_a); // exit gracefully // return true; }// method: modelToNative//// arguments:// FileName& input_file_name_a: (input) file name// FileName& output_file_name_a: (output) file name//// return: boolean value indicating status//// this method reads the data into a hash table//boolean modelToNative(Filename& input_file_name_a, Filename& output_file_name_a){ // the search levels, sub-graphs are maintained at each SearchLevel // Vector<SearchLevel> search_levels(1); // use the language model class to output different types // LanguageModel lm_model; lm_model.load(input_file_name_a, search_levels); // store the number of levels // HashTable<String, String> hash_data; String hash_key; long num_of_levels = search_levels.length(); String num_levels; num_levels.assign(num_of_levels); hash_key.assign(IO_CDATA_LEVEL_SIZE); hash_data.insert(hash_key, &num_levels); // read each level // for (long i = 0 ; i < num_of_levels; i++){ // set the prefix // String curr_level; curr_level.assign((long)i); String level_prefix; level_prefix.assign(IO_SEARCHLEVEL_PREFIX); level_prefix.concat(curr_level); // read one level // Vector<SearchSymbol> graph_name; if ( i == 0 ){ graph_name.concat(LEVEL_TREE_TOP_GRAMMAR); } else{ if ( search_levels(i-1).getContextMap().length() > 0 ){ convertContextMapping(search_levels(i-1).getContextMap(), graph_name); } else { graph_name = search_levels(i-1).getSymbolTable(); } } storeSearchLevel(search_levels(i), hash_data, graph_name, level_prefix); } // write to the file // File output_file; if (!output_file.open(output_file_name_a, File::WRITE_ONLY)) { return Error::handle(output_file_name_a, L"open error", Error::TEST, __FILE__, __LINE__); } // set the string // String output; //writeHashTable(hash_data, output, L"test"); writeHashTable(hash_data, output_file, L"test"); //Console::put(output); //output_file.put(output); output_file.close(); // exit gracefully // return true; }// method: nativeToModel//// arguments:// FileName& input_file_name_a: (input) file name// FileName& output_file_name_a: (output) file name//// return: boolean value indicating status//// this method reads the data into a hash table//boolean nativeToModel(Filename& input_file_name_a, Filename& output_file_name_a, Filename& output_sm_file_a, LanguageModel::OUTPUT_FORMAT out_format_a, LanguageModel::OUTPUT_TYPE out_type_a){ // read the file line by line and parse the input data // the data will be stored in a HashTable // HashTable<String, String> hash_data; readHashTable(input_file_name_a, hash_data); // read the number of levels // long num_of_levels = 0; hash_data.get(IO_CDATA_LEVEL_SIZE)->get(num_of_levels); Long level_num(num_of_levels); //level_num.debug(L"level_num"); // the search levels, sub-graphs are maintained at each SearchLevel // Vector<SearchLevel> search_levels(num_of_levels); // read each level // for (long i = 0 ; i < num_of_levels; i++){ // set the prefix // String curr_level; curr_level.assign((long)i); String level_prefix; level_prefix.assign(IO_SEARCHLEVEL_PREFIX); level_prefix.concat(curr_level); // read one level // readSearchLevel(search_levels(i), hash_data, level_prefix); } // use the language model class to output different types // LanguageModel lm_model; lm_model.store(output_file_name_a, search_levels, out_format_a, out_type_a); // write the statistical model file // if(!output_sm_file_a.eq(NULL)) { // print messages // String msg(L"store the statistical file"); Console::put(msg); // store the sm symbols // Sof sof_sm; sof_sm.open(output_sm_file_a, File::WRITE_ONLY); // read each level // for (long i = 0 ; i < num_of_levels; i++){ // set the prefix // String curr_level; curr_level.assign((long)i); String level_prefix; level_prefix.assign(IO_SEARCHLEVEL_PREFIX); level_prefix.concat(curr_level); // write the symbols to the sm file // Vector<SearchSymbol> sm_symbols; HashTable<SearchSymbol, Long> sm_table; // read one level // readSMSymbols(sm_symbols, hash_data, level_prefix); for ( int j = 0; j < sm_symbols.length(); j++ ){ // no multiple copies // if ( sm_table.get(sm_symbols(j)) == NULL){ Long index(j); // put the data into the hash table // sm_table.insert(sm_symbols(j), &index); } } // debug message // //sm_table.debug(L"sm_table"); // write the hashtable from the input sof file // if (!sm_table.isEmpty()) { if(!sm_table.write(sof_sm, i, SearchLevel::PARAM_STAT_HASH)) { return Error::handle(L"isip_network_converter", L"store symbols with statistical models", Error::ARG, __FILE__, __LINE__); } } } // close the sm file // sof_sm.close(); } // exit gracefully // return true; }// method: readHashTable//// arguments:// FileName&: (input) file name// HashTable&: (output) the data will be put into the hash table//// return: boolean value indicating status//// this method reads the data into a hash table//boolean readHashTable(Filename& input_file_name, HashTable<String, String>& hash_table_a){ // set local variable // File input_file; String line; input_file.open(input_file_name, File::READ_ONLY); while ( input_file.get(line) ){ // parse the line // line.trim(); if ( line.firstChr(COMMENT_CHAR) == 0 ){ continue; } else{ // find the parameter name and the value // long count = line.countTokens(DELIMITER_CHAR); if ( count != 2 ){ return Error::handle(L"read", L"parsing error in model file", Error::ARG, __FILE__, __LINE__); } long pos = 0; Vector<String> values(count); // get each word by tokenizing using multiple spaces as a delimiter // for (long i = 0; i < count; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -