📄 pdt_05.cc
字号:
if ((j == (vec_len/2)) && ((vec_ss(j).eq(SearchSymbol::NO_LEFT_CONTEXT)) || (vec_ss(j).eq(SearchSymbol::NO_RIGHT_CONTEXT)))) { valid = false; } // check for condition 4 // if ((j == (vec_len/2)) && (contextless_symbol_table_a.contains(&vec_ss(j)))) { valid = false; } } // if valid context, accumulate the context // if (valid) { // add the valid-context // long context_len = valid_contexts_a.length(); valid_contexts_a.setLength(context_len + (long)1); valid_contexts_a(context_len).assign(all_contexts_a(i)); } } // reset the capacity of the valid-contexts vector // capacity = valid_contexts_a.length(); valid_contexts_a.setCapacity(capacity); // exit gracefully // return status;}// method: getUnseenContexts//// arguments:// Vector<ContextMap>& seen_contexts: (input) contexts seen// Vector<ContextMap>& valid_contexts: (input) all valid contexts// Vector<ContextMap>& unseen_contexts: (output) contexts unseen//// return: a boolean value indicating status//// this method gets all the contexts that are not in the input// contexts//boolean PhoneticDecisionTree::getUnseenContexts(Vector<ContextMap>& seen_contexts_a, Vector<ContextMap>& valid_contexts_a, Vector<ContextMap>& unseen_contexts_a) { // local variables // boolean status = true; // set the capacity of the unseen-contexts vector // long capacity = valid_contexts_a.length(); unseen_contexts_a.setCapacity(capacity); // loop-over all the valid-contexts and accumulate the unseen-contexts // for (long i = 0; i < valid_contexts_a.length(); i++) { // local variables // boolean seen = false; // get the context // Vector<SearchSymbol> valid_vec_ss = valid_contexts_a(i).getContext(); // add this valid context-map to unseen-context-maps if it doesn't // exists in seen-contexts // for (long j = 0; j < seen_contexts_a.length(); j++) { // get the context // Vector<SearchSymbol> seen_vec_ss = seen_contexts_a(j).getContext(); if (seen_vec_ss.eq(valid_vec_ss)) { seen = true; } } if (!seen) { long len = unseen_contexts_a.length(); unseen_contexts_a.setLength(len + 1); unseen_contexts_a(len).assign(valid_contexts_a(i)); } } // reset the capacity of the valid-contexts vector // capacity = unseen_contexts_a.length(); unseen_contexts_a.setCapacity(capacity); // exit gracefully // return status;}// method: updateLowerLevel//// arguments:// Vector<ContextMap>& context_map: (input) contexts seen// Vector<ContextMap>& unseen_context_map: (input) contexts unseen// Vector<DiGraph<SearchNode> >& sub_graphs: (input) sub-graphsat lowest-level// Vector<SearchSymbol>& symbol_table: (input) symbol-table at lowest-level// HashTable<SearchSymbol, Long>& symbol_hash: (input) mapping-table at// the lowest-level//// return: a boolean value indicating status//// this method update the lowest level for unseen-contexts//boolean PhoneticDecisionTree::updateLowerLevel(Vector<ContextMap>& context_map_a, Vector<ContextMap>& unseen_context_map_a, Vector<DiGraph<SearchNode> >& sub_graphs_a, Vector<SearchSymbol>& symbol_table_a, HashTable<SearchSymbol,Long>& symbol_hash_a) { // update the capacity of the context_map_out, sub-graphs // long len_context_map = context_map_a.length(); long len_unseen_context_map = unseen_context_map_a.length(); long cap_context_map = len_context_map + len_unseen_context_map; context_map_a.setCapacity(cap_context_map); long len_sub_graphs = sub_graphs_a.length(); long cap_sub_graphs = len_sub_graphs + len_unseen_context_map; sub_graphs_a.setCapacity(cap_sub_graphs); // update the capacity of the symbol-table // DiGraph<SearchNode> temp_graph_copy; sub_graphs_a(len_sub_graphs -1).setAllocationMode(DstrBase::SYSTEM); temp_graph_copy.assign(sub_graphs_a(len_sub_graphs - 1)); sub_graphs_a(len_sub_graphs -1).setAllocationMode(DstrBase::USER); long len_symbols_per_graph = 0; for (boolean morea = temp_graph_copy.gotoFirst(); morea; morea = temp_graph_copy.gotoNext()) { len_symbols_per_graph++; } long cap_symbol_table = symbol_table_a.length() + (len_symbols_per_graph * len_unseen_context_map); symbol_table_a.setCapacity(cap_symbol_table); // loop-over all the unseen context-maps // for (long i = 0; i < unseen_context_map_a.length(); i++) { // local variables // long curr_index = 0; long symbol_index = 0; long tmp_index = 0; long len = 0; Long val; ContextMap context; DiGraph<SearchNode> graph_copy; // add the unseen context and its index to the context-maps // curr_index = sub_graphs_a.length(); context = unseen_context_map_a(i); context.setContextIndex((ulong)curr_index); len = context_map_a.length(); context_map_a.setLength(len + 1); context_map_a(len).assign(context); // update the context-indices of the unseen-contexts also // unseen_context_map_a(i).assign(context); // create a copy of the subgraph corresponding to the central contex symbol // and append the copy to the vector of subgraphs // sub_graphs_a(curr_index -1).setAllocationMode(DstrBase::SYSTEM); graph_copy.assign(sub_graphs_a(curr_index - 1)); graph_copy.setAllocationMode(DstrBase::USER); sub_graphs_a(curr_index -1).setAllocationMode(DstrBase::USER); sub_graphs_a.concat(graph_copy); // loop over each vertex in the subgraph of the appended copy // SearchSymbol ss; for (boolean more = sub_graphs_a(curr_index).gotoFirst(); more; more = sub_graphs_a(curr_index).gotoNext()) { // retrieve the symbol index corresponding to the current vertex // symbol_index = sub_graphs_a(curr_index).getCurr()->getItem()->getSymbolId(); // create a new search symbol and append it to the symbol table // ss.assign(L"S_"); tmp_index = (long)symbol_table_a.length() + 1; ss.concat(tmp_index); while (symbol_table_a.contains(&ss)) { ss.assign(L"S_"); ss.concat(++tmp_index); } symbol_table_a.concat(ss); val.assign(symbol_table_a.length() - 1); symbol_hash_a.insert(ss, &val); sub_graphs_a(curr_index).getCurr()->getItem()->setSymbolId((long)val); } } // exit gracefully // return true;}// method: markNode//// arguments:// TreeNode* node: (input) input node that will be marked as non-existing// boolean& flag_a: (input) flag that will be set at the node//// return: a boolean value indicating status//// this method marks the input node//boolean PhoneticDecisionTree::markNode(TreeNode* node_a, boolean& flag_a) { // local variables // PhoneticDecisionTreeNode* pdt_node = (PhoneticDecisionTreeNode*)NULL; Data data; DataPoint datapoint; boolean res = true; // check the input node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"markNode - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the data points on the node // pdt_node = node_a->getItem(); // check the data // if (pdt_node == (PhoneticDecisionTreeNode*)NULL) { return Error::handle(name(), L"markNode - NULL DATA", Error::ARG, __FILE__, __LINE__); } // mark this node as non-existing // res = pdt_node->setFlagExists(flag_a); // exit gracefully // return res; }// method: updateTypicalIndex//// arguments:// TreeNode* start_node: (input) input start_node//// TreeNode* best_node: (input) input best candidate node that will// whose typical-index will be updated//// return: a boolean value indicating status//// this method updates the typical-index of the best-node to the// typical-index of the start-node. this is needed for the test mode//boolean PhoneticDecisionTree::updateTypicalIndex(TreeNode* start_node_a, TreeNode* best_node_a) { // local variables // PhoneticDecisionTreeNode* start_pdt_node = (PhoneticDecisionTreeNode*)NULL; PhoneticDecisionTreeNode* best_pdt_node = (PhoneticDecisionTreeNode*)NULL; Long typical_index = -1; DataPoint datapoint; boolean res = true; // check the nodes // if (start_node_a == (TreeNode*)NULL) { return Error::handle(name(), L"updateTypicalIndex - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } if (best_node_a == (TreeNode*)NULL) { return Error::handle(name(), L"updateTypicalIndex - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the typical-index of the statistical-model at the start-node // start_pdt_node = start_node_a->getItem(); // check the data // if (start_pdt_node == (PhoneticDecisionTreeNode*)NULL) { return Error::handle(name(), L"updateTypicalIndex - NULL DATA", Error::ARG, __FILE__, __LINE__); } typical_index = start_pdt_node->getTypicalIndex(); // update the typical-index of the statistical-model at the best-node // best_pdt_node = best_node_a->getItem(); // check the data // if (best_pdt_node == (PhoneticDecisionTreeNode*)NULL) { return Error::handle(name(), L"updateTypicalIndex - NULL DATA", Error::ARG, __FILE__, __LINE__); } res = best_pdt_node->setTypicalIndex(typical_index); // exit gracefully // return res; }// method: reindexSubTree//// arguments:// Treenode* node: (input) input node// long& index: (input/output) value of index//// return: a boolean value indicating status//// this method reindexes the statistical-models at the leaf-nodes of// the sub-tree under the input node//boolean PhoneticDecisionTree::reindexSubTree(TreeNode* node_a, long& index_a) { // define local variable // TreeNode* node = (TreeNode*)NULL; boolean res = false; SingleLinkedList<TreeNode> leaf_nodes(DstrBase::USER); // check the input node // node = node_a; if (node == (TreeNode*)NULL) { return Error::handle(name(), L"reindexSubTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get all the leaf-nodes below the input node // res = getLeafNodes(*node, leaf_nodes); // loop-over all the leaf-nodes and reindex the statistical models // for (boolean more = leaf_nodes.gotoFirst(); more; more = leaf_nodes.gotoNext()) { // local variables // TreeNode* temp_node = (TreeNode*)NULL; // get the leaf-node // temp_node = leaf_nodes.getCurr(); // check the node // if (temp_node == (TreeNode*)NULL) { return Error::handle(name(), L"reindexTrain - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // local variables // PhoneticDecisionTreeNode* pdt_node = (PhoneticDecisionTreeNode*)NULL;; Data data; DataPoint datapoint; double max_scale = (double)-1000; DataPoint typical_datapoint; StatisticalModel typical_stat_model; // loop-over all the data at this leaf-node and find the typical // statistical-model to which all the rest of the // statistical-models at this node will be tied to. the model with // the highest scale or lowest variance is a typical model // // get all the data points on the node // pdt_node = temp_node->getItem(); // check the node // if (pdt_node == (PhoneticDecisionTreeNode*)NULL) { return Error::handle(name(), L"getStatTrain - NULL PDT VERTEX", Error::ARG, __FILE__, __LINE__); } data = pdt_node->getDataPoints(); for (boolean morea = data.gotoFirst(); morea; morea = data.gotoNext()) { // local variables // double scale; StatisticalModel datapoint_stat_model; Long temp_index; // get the inverse scale for this datapoint // datapoint = *(data.getCurr()); datapoint_stat_model = datapoint.second(); scale = computeScale(datapoint_stat_model); scale = 2.0 * scale; // get the index of this statistical-model // temp_index = datapoint.first(); // find the typical statistical model // if ( scale > max_scale) { max_scale = scale; typical_datapoint = datapoint; typical_stat_model = datapoint_stat_model; } } // end of for-loop over all datapoints at a leafnode // save the new typical-index, actual-index and statistical-model // that represents this leaf-node to this leaf node so that we can // later retrive it. tupical-index gets update during merging but // actual index remains the same // pdt_node->setTypicalIndex((Long)index_a); pdt_node->setActualIndex((Long)index_a); pdt_node->setTypicalStatModel(typical_stat_model); // increment the index // index_a++; } // end of for loop over leaf-nodes // exit gracefully // return res;}// method: getCentralSymbols//// arguments:// Vector<SearchSymbol>& symbol_table: (input) symbol-table// Vector<SearchSymbol>& contextless_symbol_table: (input) contextless// symbol-table// SingleLinkedList<String>& central_symbols: (output) central symbols//// return: a boolean value indicating status//// this method get the central symbols excluding the contextless// symbols, NO_LEFT_CONTEXT, and NO_RIGHT_CONTEXT//boolean PhoneticDecisionTree::getCentralSymbols(Vector<SearchSymbol>& symbol_table
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -