📄 ngrm_02.cc
字号:
// file: $isip/class/stat/NGramModel/ngrm_02.cc// version: $Id: ngrm_02.cc,v 1.3 2002/07/30 05:03:25 zheng Exp $//// isip include files//#include "NGramModel.h"#include <Console.h>// method: diagnose//// arguments:// Integral::DEBUG level: (input) debug level for diagnostics//// return: a boolean value indicating status//boolean NGramModel::diagnose(Integral::DEBUG level_a) { //--------------------------------------------------------------------- // // 0. preliminaries // //--------------------------------------------------------------------- // output the class name // if (level_a > Integral::NONE) { String output(L"diagnosing class "); output.concat(CLASS_NAME); output.concat(L": "); Console::put(output); Console::increaseIndention(); } //-------------------------------------------------------------------- // // 1. required public methods // //-------------------------------------------------------------------- // set indentation // if (level_a > Integral::NONE) { Console::put(L"testing required public methods...\n"); Console::increaseIndention(); } // test destructor/constructor(s) // NGramModel ngrm0; NGramModel ngrm1(ngrm0); if (!ngrm1.eq(ngrm0)) { return Error::handle(name(), L"copy constructor", Error::TEST, __FILE__, __LINE__); } // test large allocation construction and deletion // if (level_a > Integral::BRIEF) { Console::put(L"testing large chunk memory allocation and deletion:\n"); // set the memory to a strange block size so we can hopefully catch any // frame overrun errors // NGramModel::setGrowSize((long)500); NGramModel* pft = new NGramModel(); for (long j = 1; j <= 100; j++) { NGramModel** pfts = new NGramModel*[j * 100]; // create the objects // for (long i = 0; i < j * 100; i++) { pfts[i] = new NGramModel(); } // delete objects // for (long i = (j * 100) - 1; i >= 0; i--) { delete pfts[i]; } delete [] pfts; } delete pft; } // test the i/o methods // NGramNode node2, node3; NGramModel rd_ngrm1, rd_ngrm2; HashTable<Long, NGramNode>* ht1 = ngrm0.getGramHash(); ht1->setCapacity(3); ht1->insert(2, &node2); ht1->insert(3, &node3); // we need binary and text sof files // String tmp_filename0; Integral::makeTemp(tmp_filename0); String tmp_filename1; Integral::makeTemp(tmp_filename1); // open files in write mode // Sof tmp_file0; tmp_file0.open(tmp_filename0, File::WRITE_ONLY, File::TEXT); Sof tmp_file1; tmp_file1.open(tmp_filename1, File::WRITE_ONLY, File::BINARY); if (level_a > Integral::DETAILED) { ngrm0.debug(L"ngrm0"); } // write to sof file // ngrm0.write(tmp_file0, 0); ngrm0.write(tmp_file1, 0); // close the files // tmp_file0.close(); tmp_file1.close(); // open the files in read mode // tmp_file0.open(tmp_filename0); tmp_file1.open(tmp_filename1); // read the value back // rd_ngrm1.read(tmp_file0, 0); if (!rd_ngrm1.eq(ngrm0)) { return Error::handle(name(), L"read", Error::TEST, __FILE__, __LINE__); } rd_ngrm2.read(tmp_file1, 0); if (!rd_ngrm2.eq(ngrm0)) { return Error::handle(name(), L"read", Error::TEST, __FILE__, __LINE__); } // close and delete the temporary files // tmp_file0.close(); tmp_file1.close(); File::remove(tmp_filename0); File::remove(tmp_filename1); // reset indentation // if (level_a > Integral::NONE) { Console::decreaseIndention(); } //-------------------------------------------------------------------------- // // 2. class-specific public methods: // algorithm methods // //-------------------------------------------------------------------------- // set indentation // if (level_a > Integral::NONE) { Console::put(L"testing class-specific public methods: computational methods...\n"); Console::increaseIndention(); } // build a hash table which convert String to Long // String all_symbol(L"!SENT_START !SENT_END ZERO ONE TWO THREE FOUR FIVE SIX SEVEN EIGHT NINE OH"); String symbol; long pos = 0; long symbol_index = 0; Long Symbol_index; Vector<String> symbol_table; HashTable<String, Long> symbol_hash; Sof sof; String file_name; // create a symbol table and a hash table // while(all_symbol.tokenize(symbol, pos)) { symbol.trim(); if (!symbol.eq(String::DEF_VALUE)) { symbol_table.setLength(symbol_index + 1); symbol_table(symbol_index).assign(symbol); Symbol_index.assign(symbol_index); symbol_hash.insert(symbol, &Symbol_index); symbol_index++; } } // open the language model file // file_name.assign(L"../../../doc/examples/data/models/tidigits_trigram.arpa"); if (!sof.open(file_name, File::READ_ONLY)) { return Error::handle(file_name, L"open", Error::FILE_NOTFND, __FILE__, __LINE__); } // configure ngram object and load a trigram file // NGramModel ngrm; ngrm.setOrder(3); ngrm.load(sof, 1, symbol_table); sof.close(); // test eq method // NGramModel ngrm_tmp; ngrm_tmp.assign(ngrm); if (!ngrm_tmp.eq(ngrm)) { return Error::handle(name(), L"eq", Error::TEST, __FILE__, __LINE__); } // test getScore // String symb1(L"ONE"), symb2(L"TWO"), symb3(L"FOUR"); VectorLong index(2); Float exp_res; // verify a bigram (ONE TWO) probability // index(0) = *symbol_hash.get(symb1); index(1) = *symbol_hash.get(symb2); Float score = ngrm.getScore(index); exp_res = -1.17367 * Integral::LN10; if (!score.almostEqual(exp_res)) { exp_res.debug(L"expected result for \"ONE TWO\""); return Error::handle(name(), L"getScore", Error::TEST, __FILE__, __LINE__); } // verify a trigram (ONE FOUR TWO) probability // index.setLength(3); index(1) = *symbol_hash.get(symb3); index(2) = *symbol_hash.get(symb2); score = ngrm.getScore(index); exp_res = -1.065393 * Integral::LN10; if (!score.almostEqual(exp_res)) { exp_res.debug(L"expected result for \"ONE FOUR TWO\""); return Error::handle(name(), L"getScore", Error::TEST, __FILE__, __LINE__); } // verify a trigram (ONE TWO OH) backoff probability // index(0) = *symbol_hash.get(symb1); index(1) = *symbol_hash.get(symb2); index(2) = *symbol_hash.get(L"OH"); score = ngrm.getScore(index); exp_res = 0.04061788 * Integral::LN10 + -1.20412 * Integral::LN10; if (!score.almostEqual(exp_res)) { exp_res.debug(L"expected result for \"ONE TWO OH\""); return Error::handle(name(), L"getScore", Error::TEST, __FILE__, __LINE__); } // verify a trigram (OH ZERO THREE) backoff probability // index(0) = *symbol_hash.get(L"OH"); index(1) = *symbol_hash.get(L"ZERO"); index(2) = *symbol_hash.get(L"THREE"); score = ngrm.getScore(index); exp_res = -0.906405 * Integral::LN10 + -1.187113 * Integral::LN10; if (!score.almostEqual(exp_res)) { exp_res.debug(L"expected result for \"OH ZERO THREE\""); return Error::handle(name(), L"getScore", Error::TEST, __FILE__, __LINE__); } // write the instance of the object into the Sof file // file_name.assign(L"trigram.sof"); if (!sof.open(file_name, File::WRITE_ONLY)) { return Error::handle(file_name, L"open", Error::FILE_NOTFND, __FILE__, __LINE__); } // store it to source ngram file, and then read it back. // both should match. // ngrm.store(sof, 1); sof.close(); // load it back // NGramModel new_ngrm; new_ngrm.setOrder(3); sof.open(file_name, File::READ_ONLY); new_ngrm.load(sof, 1, symbol_table); sof.close(); // compare both // if (!ngrm.eq(new_ngrm)) { ngrm.debug(L"ngrm"); new_ngrm.debug(L"new_ngrm"); return Error::handle(name(), L"getScore", Error::TEST, __FILE__, __LINE__); } // remove files // if (level_a < Integral::ALL) { File::remove(file_name); } // test the i/o methods // file_name.assign(L"ngram.sof"); Sof tmp_file2; // open files in write mode // tmp_file0.open(tmp_filename0, File::WRITE_ONLY, File::TEXT); tmp_file1.open(tmp_filename1, File::WRITE_ONLY, File::BINARY); tmp_file2.open(file_name, File::WRITE_ONLY, File::TEXT); // write to sof file // ngrm.write(tmp_file0, 0); ngrm.write(tmp_file1, 0); ngrm.write(tmp_file2, 0); // close the files // tmp_file0.close(); tmp_file1.close(); tmp_file2.close(); // open the files in read mode // tmp_file0.open(tmp_filename0); tmp_file1.open(tmp_filename1); // read the value back // rd_ngrm1.clear(Integral::RESET); rd_ngrm1.read(tmp_file0, 0); if (!rd_ngrm1.eq(ngrm)) { return Error::handle(name(), L"read", Error::TEST, __FILE__, __LINE__); } rd_ngrm1.clear(Integral::RESET); rd_ngrm2.read(tmp_file1, 0); if (!rd_ngrm2.eq(ngrm)) { return Error::handle(name(), L"read", Error::TEST, __FILE__, __LINE__); } // close and delete the temporary files // tmp_file0.close(); tmp_file1.close(); // remove temp files // File::remove(tmp_filename0); File::remove(tmp_filename1); if (level_a < Integral::ALL) { File::remove(file_name); } // reset indentation // if (level_a > Integral::NONE) { Console::decreaseIndention(); } // -------------------------------------------------------------------- // // 3. print completion message // // -------------------------------------------------------------------- // reset indentation // if (level_a > Integral::NONE) { Console::decreaseIndention(); } if (level_a > Integral::NONE) { String output(L"diagnostics passed for class "); output.concat(name()); output.concat(L"\n"); Console::put(output); } // exit gracefully // return true;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -