📄 pdt_05.cc
字号:
// file: $isip/class/pr/PhoneticDecisionTree/pdt_05.cc// version: $Id: pdt_05.cc,v 1.11 2002/11/03 05:53:33 parihar Exp $//// isip include files//#include "PhoneticDecisionTree.h" // method: runDecisionTree//// arguments:// none//// return: a boolean value indicating status//// this method runs the decisiontree using the specified runmode and// stopmode.//boolean PhoneticDecisionTree::runDecisionTree() { // local variables // boolean res; // runmode: TRAIN && stopmode: THRESH // if ((runmode_d == TRAIN) && (stopmode_d == THRESH)) { // construct the root node and insert it in the graph // BiGraphVertex<PhoneticDecisionTreeNode>* rootnode = insertVertex(&pdt_rootnode_d); // connect the start node to the root node // insertArc(getStart(), rootnode, false, 0); // train the tree // res = trainDecisionTree(); } // runmode: TEST && stopmode: THRESH // else if ((runmode_d == TEST) && (stopmode_d == THRESH)){ // classify the data in test mode // res = true; } // error: unknown mode // else { return Error::handle(name(), L"runDecisionTree", ERR, __FILE__, __LINE__); } // exit gracefully // return res;}// method: trainDecisionTree//// arguments:// none//// return: a boolean value indicating status//// this method creates (train) the decision-tree on the basis of// algorithm and implementation//boolean PhoneticDecisionTree::trainDecisionTree() { // local variables // boolean res; // algorithm: ML && implementation: DEFAULT // if ((algorithm_d == ML) && (implementation_d == DEFAULT)) { // local variables // TreeNode* root_node = (TreeNode*)NULL; SingleLinkedList<TreeNode> leaf_nodes(DstrBase::USER); // first classify the root node into the children on the basis of // central symbol (first-attribute is central-monophone in the // model) // root_node = getFirst(); // check the node // if (root_node == (TreeNode*)NULL) { return Error::handle(name(), L"trainDecisionTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } attributes_d.gotoFirst(); Attribute* attribute = attributes_d.getCurr(); res = classifyData(root_node, *attribute); // classify the leaf-nodes of the tree to the children on the basis // of position of the state (datapoint) in the model-topology // (second-attribute is state-position) // // get all the leaf nodes below the node // res = getLeafNodes(*root_node, leaf_nodes); // loop over all the leaf nodes and classify them on the basis of // state-position // attributes_d.gotoNext(); attribute = attributes_d.getCurr(); for (boolean more = leaf_nodes.gotoFirst(); more; more = leaf_nodes.gotoNext()) { // local variables // TreeNode* temp_node = (TreeNode*)NULL; // get the leaf-node // temp_node = leaf_nodes.getCurr(); // check the node // if (temp_node == (TreeNode*)NULL) { return Error::handle(name(), L"trainDecisionTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } res = classifyData(temp_node, *attribute); } // first split each leaf node as one decision-tree, reindex the // leaf-nodes and then merge it as a sub-tree. loop over all the nodes // one-by-one and split each tree at a time // long index = 0; leaf_nodes.clear(Integral::RESET); res = getLeafNodes(*root_node, leaf_nodes); for (boolean more = leaf_nodes.gotoFirst(); more; more = leaf_nodes.gotoNext()) { // local variables // TreeNode* temp_node = (TreeNode*)NULL; // get the leaf-node // temp_node = leaf_nodes.getCurr(); // check the node // if (temp_node == (TreeNode*)NULL) { return Error::handle(name(), L"trainDecisionTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // split the sub-tree // res = splitSubTree(temp_node); // reindex the statistical-models on the leaf-nodes of the // sub-tree // res = reindexSubTree(temp_node, index); // merge the sub-tree // res = mergeSubTree(temp_node); } } // error: unknown mode // else { return Error::handle(name(), L"trainDecisionTree", ERR, __FILE__, __LINE__); } // exit gracefully // return res;}// method: splitSubTree//// arguments:// Treenode* node: (input) input node//// return: a boolean value indicating status//// this method split the input node as a sub-tree till the threshold// conditions are met//boolean PhoneticDecisionTree::splitSubTree(TreeNode* node_a) { // define local variable // boolean res = false; boolean split = true; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"splitSubTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // continue to split the tree till we can't find the best leaf-node // to split that satisfies the threshold conditions // while (split) { // define local variable // Attribute attribute; TreeNode* best_node = (TreeNode*)NULL; float max_inc_likelihood = (float)0; split = false; SingleLinkedList<TreeNode> leaf_nodes(DstrBase::USER); // get all the leaf nodes below the node // res = getLeafNodes(*node_a, leaf_nodes); // find the best attribute at the current leaf-node. find the best // candidate leaf-node for the split // for (boolean more = leaf_nodes.gotoFirst(); more; more = leaf_nodes.gotoNext()) { // define local variable // TreeNode* child_node = (TreeNode*)NULL; float inc_likelihood = (float)0; Attribute best_attribute; boolean att = false; child_node = leaf_nodes.getCurr(); // check the node // if (child_node == (TreeNode*)NULL) { return Error::handle(name(), L"splitSubTree - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // find the best attribute at the current leaf-node and its // likelihood increment // att = findBestAttribute(child_node, best_attribute, inc_likelihood); // update the best node to split at a level // if (att && (inc_likelihood > max_inc_likelihood)) { max_inc_likelihood = inc_likelihood; best_node = child_node; attribute = best_attribute; split = true; res = true; } } // classify into childern only if there is attribute that // satisfies the threshold conditions // if (split) { res = classifyData(best_node, attribute); } } // exit gracefully // return res;}// method: findBestAttribute//// arguments:// Attribute& best_attribute: (output) best attribute to split the node// float& inc_likelihood: (output) increase in likelihood if the node is split// TreeNode* node: (input) input node//// return: a boolean value indicating status//// this method computes the best attribute and its corresponding// increase in the likelihood in order to split the input node.//boolean PhoneticDecisionTree::findBestAttribute(TreeNode* node_a, Attribute& best_attribute_a, float& inc_likelihood_a) { // local variables // float likelihood = (float)0; boolean res; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"findBestAttribute - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // compute the likelihood of the the node // res = computeLikelihoodNode(node_a, likelihood); // loop over all the attributes and find the one with maximum // likelihood // for (boolean k = attributes_d.gotoFirst(); k; k = attributes_d.gotoNext()) { // local variables // Attribute* attribute = (Attribute*)NULL; Attribute temp_attribute; boolean att; float split_likelihood = (float)0; float inc_likelihood = (float)0; // get the attribute // attribute = attributes_d.getCurr(); // check the attribute // if (attribute == (Attribute*)NULL) { return Error::handle(name(), L"findBestAttribute - NULL ATTRIBUTE", Error::ARG, __FILE__, __LINE__); } temp_attribute = *attribute; // compute the likelihood after the splitting this node on the // current attribute. the return flag will be false if the // node is a pure node given the attribute // att = computeLikelihoodSplitNode(node_a, temp_attribute, split_likelihood); // compute increase in likelihood due to splitting the node on the // current attribute // inc_likelihood = split_likelihood - likelihood; // the best attribute is valid only when // 1) split is valid (input node is not pure given this attribute) // 2) increase in likelihood is due to the greater than the maximum // increase in likelihood till this attribute // 3) state-occupancies of split nodes are greater than the // num_occ_threshold // if (att && (inc_likelihood > inc_likelihood_a) && isSplitOccupancyBelowThreshold(node_a, temp_attribute)) { inc_likelihood_a = inc_likelihood; best_attribute_a = temp_attribute; } } // the best attribute is valid only when the split meets the // threshold conditions. increase in likelihood is greater than the // split_threshold // if (inc_likelihood_a > split_threshold_d) res = true; else res = false; // exit gracefully // return res;} // method: computeLikelihoodNode//// arguments:// float& likelihood: (output) likelihood at the input node// TreeNode* node: (input) input node//// return: a boolean value indicating status//// this method computes the likelihood at the input node.//boolean PhoneticDecisionTree::computeLikelihoodNode(TreeNode* node_a, float& likelihood_a) { // local variables // float likelihood = (float)0; float sum_num_occ; float det_pooled_covar; boolean res; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"computeLikelihoodNode - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the sum of occupancies at this node // res = computeSumOccupancy(node_a, sum_num_occ); // get the data in singlelinked list // PhoneticDecisionTreeNode* pdt_node = node_a->getItem(); Data& data = pdt_node->getDataPoints(); // get the first datapoint in triple // DataPoint* datapoint = data.getFirst(); // get datapoint statistical model // StatisticalModel& datapoint_stat_model = datapoint->second(); // compute likelihood only when the sum_num_occ is non-zero. this is // necessary for the liklihood computation to have valid division // assuming gaussian distribution. note that the likelihood // computation may be different for any other distribution // if (sum_num_occ != (float) 0) { // compute likeihood only for single-mixture gaussian // distribution, else error // // reference Eq 6 // J. Zhao, et al, "Tutorial for Decision Tree-Based // State-Tying For Acoustic Modeling", pp. 6, June, 1999. // // L = -0.5 * (n * (1 + ln(2*pi)) + ln(|C|)) * sum_num_occ // // where n = number of features // sum_num_occ = sigma ( state_num_occ(s) ) // s // where s = statistical models at this node // // // compute only if the underlying model is MixtureModel, else // error // if (datapoint_stat_model.getType() == StatisticalModel::MIXTURE_MODEL) { // local variables // StatisticalModel* mixture; // get the mixtures as SingleLinkedList of StatisticalModels // MixtureModel& mixture_model = datapoint_stat_model.getMixtureModel(); SingleLinkedList<StatisticalModel>& mixtures = mixture_model.getModels(); // get the first mixture as StatisticalModel // mixture = mixtures.getFirst(); // check if the distribution is gaussian and single mixture // if ((mixture->getType() == StatisticalModel::GAUSSIAN_MODEL) && (mixtures.length() == (long)1) ) { // compute determinant of the pooled covariance // res = computeDeterminantPooledCovariance(node_a, det_pooled_covar); // get number of features from the mean dimensions last // datapoint statistical node, set before this if statement // // local variables // VectorFloat mean; long num_features; // get the mean of the statistical model. note that the // underlying distribution are actually gaussian in this case
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -