📄 ngmp_05.cc
字号:
prefix_node = history_node; } if (!break_flag) { // print debugging information // if (debug_level_d >= Integral::DETAILED) { buffer.debug(L"buffer"); } // read the current word // pos = 0; buffer.tokenize(buf, pos, delimiter); // print debugging information // if (debug_level_d >= Integral::DETAILED) { buf.debug(L"buf"); } buffer.deleteRange(0, pos); buffer.trimLeft(); // get the corresponding word from the lexicon // symbol_index = symbol_hash.get(buf); if (symbol_index == NULL) { flag = false; buf.debug(L"err_symbol"); return Error::handle(name(), L"load", ERR_SYMBOL, __FILE__, __LINE__, Error::WARNING); } // read the backoff score if any // if (buffer.get(backoff)) { // as the score is in log10 form, convert it to log_e // backoff *= Integral::LN10; } else { backoff = 0; } // if the current word exists in the lexicon // if (flag) { // configure an n-gram node // ngrm_node.setIndex(*symbol_index); ngrm_node.setLmScore(lmscore); ngrm_node.setBackoff(backoff); // put the node in the list // symbol_list.insert(&ngrm_node); // increment the number of items for this prefix // prefix_items++; } } } // end else this is data line } // end while // we may need to clean up after an ngram read // // see if the prefix has changed. if so, then dump the current ngram // data // // if there are items for this prefix then add them // if (prefix_items > 0) { // store the items in the list to hash table // num_gram_read(curr_order) += symbol_list.length(); storeToHash(gram_hash_a, symbol_list, prefix_node); } // reset variables // prefix_items = 0; // reset the prefix node // prefix_node = (NGramNode*)NULL; // clear the prefix // prefix_changed = true; // check the number of ngram // for (long order = 0; order < order_d; order++) { if (gram_size(order) != num_gram_read(order)) { gram_size.debug(L"gram_size"); num_gram_read.debug(L"num_gram_read"); return Error::handle(name(), L"load", ERR_NUM_GRAM, __FILE__, __LINE__); } } } // end of NGRAM_ARPA format // other format // else { return Error::handle(name(), L"load", ERR_FORMAT, __FILE__, __LINE__); } // exit gracefully // return true;}// method: storeToHash//// arguments:// HashTable<Long, NGramNode>& gram_hash: (output) target gram hash table// SingleLinkedList<NGramNode>& list: (input) ngram node list// NGramNode* prefix_node: (input) the upper order ngram node//// return: a logical_1 indicating status//// this method stores a list of ngram nodes into hash table//boolean NGramParser::storeToHash(HashTable<Long, NGramNode>& gram_hash_a, SingleLinkedList<NGramNode>& list_a, NGramNode* prefix_node_a) { // local variables // NGramNode* node; Long count; HashTable<Long, NGramNode>* hash; // allocate memory for hash pointer // if (prefix_node_a == NULL) { // point to the unigram hash table // hash = &gram_hash_a; } else { hash = new HashTable<Long, NGramNode>; prefix_node_a->setNextGram(hash); } // find the first ngram node from list // list_a.gotoFirst(); node = list_a.getFirst(); // store the nodes from list to hash table // while (node != NULL) { hash->insert(node->getIndex(), node); count++; node = list_a.getNext(); list_a.gotoNext(); } // clean list // list_a.clear(Integral::FREE); // exit gracefully // return true;}// method: store//// arguments:// Sof& sof: (input) output ngram source file// const HashTable<Long, NGramNode>& gram_hash: (input) input gram hash table// const Vector<String>& symbol_table: (input) symbol table mapping index// const long& tag: (input) sof object instance name// const String& name: (input) sof object instance name//// return: a boolean indicating status//// this method stores ngram probabilities to a file with source format//boolean NGramParser::store(Sof& sof_a, const HashTable<Long, NGramNode>& gram_hash_a, const Vector<String>& symbol_table_a, const long& tag_a, const String& name_a) const { // local variables // VectorLong gram_size(order_d); const HashTable<Long, NGramNode>* hash; Vector<NGramNode> vec; const NGramNode* node = (NGramNode*)NULL; long len; Vector<VectorLong> indices[order_d]; VectorLong keys; Vector<Long> tmp_keys; String symbol; // initialize values // gram_size.assign((long)0); // read unigram // indices[0].setLength(1); // order = 1 gram_hash_a.keys(tmp_keys); // get all the unigram len = tmp_keys.length(); indices[0](0).setLength(len); for (long n = 0; n < len; n++) indices[0](0)(n) = tmp_keys(n); gram_size(0) = len; // read higher order gram // for (long order = 1; order < order_d; order++) { // initialize variables // long lower = order - 1; indices[order].setLength(order + 1); // set length to order // go through all the nodes in the lower order grams // for (long i = 0; i < gram_size(lower); i++) { // find the hash table for the gram // hash = &gram_hash_a; // initialize hash pointer for (long j = 0; j < order; j++) { node = hash->get(indices[lower](j)(i)); hash = node->getNextGram(); } // find the next gram for this node // if (hash == NULL) { continue; } hash->keys(tmp_keys); len = tmp_keys.length(); keys.setLength(len); gram_size(order) += len; for (long n = 0; n < len; n++) keys(n) = tmp_keys(n); long old_len = indices[order](0).length(); // assign the indices of lower gram // for (long k = 0; k < order; k++) { indices[order](k).setLength(old_len + len); indices[order](k).setRange(old_len, len, indices[lower](k)(i)); } // assign the index of new gram // indices[order](order).concat(keys); } } /* if (debug_level_d >= Integral::DETAILED) { gram_size.debug(L"gram_size"); } */ // declare local variables // long obj_size; if (sof_a.isText()) { obj_size = Sof::ANY_SIZE; } else { printf("no binary write\n"); exit(1); } // put the object into the sof file's index // if (!sof_a.put(name_a, tag_a, obj_size)) { return false; } // declare output variables // String output, value; // output source file format // output.assign(L"format = \"NGRAM_ARPA\";\n\n"); sof_a.puts(output); // output "\data\" // sof_a.puts(L"\\data\\\n"); // output ngram numbers // for (long i = 1; i <= order_d; i++) { output.assign(L"ngram "); value.assign(i); output.concat(value); output.concat(L"="); value.assign(gram_size(i-1)); output.concat(value); output.concat(L"\n"); sof_a.puts(output); } // output a blank line // sof_a.puts(L"\n"); // output ngram probabilities // for (long order = 0; order < order_d; order++) { // output "\n-grams:" // output.assign(L"\\"); value.assign(order + 1); output.concat(value); output.concat(L"-grams:\n"); sof_a.puts(output); // output probabilities from ngram nodes // len = indices[order](0).length(); for (long i = 0; i < len; i++) { // find the hash table for the gram // hash = &gram_hash_a; for (long j = 0; j <= order; j++) { node = hash->get(indices[order](j)(i)); hash = node->getNextGram(); } if (node == NULL) { return Error::handle(name(), L"store", ERR_SYMBOL, __FILE__, __LINE__); } // output probability // output.assign(node->getLmScore() / Integral::LN10); output.concat(L" \t"); // output N-symbols // for (long j = 0; j <= order; j++) { // search_level_a.getSymbol((SearchSymbol&)value, indices[order](j)(i)); symbol = symbol_table_a(indices[order](j)(i)); output.concat(symbol); output.concat(L" "); } // output backoff probability // if (node->getBackoff() != 0) { output.concat(L"\t"); value.assign(node->getBackoff() / Integral::LN10); output.concat(value); } // output to file // output.concat(L"\n"); sof_a.puts(output); } sof_a.puts(L"\n"); } // output "\end\" mark // sof_a.puts(L"\\end\\"); // exit gracefully // return true;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -