📄 pdt_05.cc
字号:
// res = datapoint_stat_model.getMean(mean); // get the dimensionality of the mean. it is equal to the num // of features // num_features = mean.length(); // compute likelihood assuming gaussian distribution // float log_det = Integral::log(det_pooled_covar); double temp = Integral::log(Integral::TWO_PI); likelihood = -0.5 * (num_features * (1 + temp) + log_det) * sum_num_occ; } // error: unknown distribution and multiple mixture models // else { return Error::handle(name(), L"computeLikelihoodNode", ERR, __FILE__, __LINE__); } } // error: only MixtureModel supported // else { return Error::handle(name(), L"computeLikelihoodNode", ERR, __FILE__, __LINE__); } } // end if sum_num_occ != 0 likelihood_a = likelihood; // exit gracefully // return res; }// method: computeDeterminantPooledCovariance//// arguments:// float& det_pooled_covariance: (output) dereminant of the pooled covariance// at the input node// TreeNode* node: (input) input node//// return: a boolean value indicating status//// this method computes the likelihood at the input node.//// reference Eq 7// J. Zhao, et al, "Tutorial for Decision Tree-Based// State-Tying For Acoustic Modeling", pp. 6, June, 1999.//// note that in this implementation we assume the mean as a row// vector while the reference equation assumes the mean to be a column// vector.//// |C| = determinant(C)//// C = (a / sum_num_occ) - b//// where a = sigma ( num_occ(s) ( cov(s) + transpose(mean(s)) * mean(s) ) )// s//// sum_num_occ = sigma ( state_num_occ(s) )// s//// b = transpose(c) * c//// c = (sigma ( num_occ(s) * mean(s) )) / sum_num_occ ;// s//// where s = statistical models at this node// boolean PhoneticDecisionTree::computeDeterminantPooledCovariance(TreeNode* node_a, float& det_pooled_covariance_a) { // local variables // float sum_num_occ; MatrixFloat a; MatrixFloat b; VectorFloat c; MatrixFloat pooled_covar; boolean diagonal = true; boolean res = true; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"computeDeterminantPooledCovariance - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the data in singlelinked list // PhoneticDecisionTreeNode* pdt_node = node_a->getItem(); Data& data = pdt_node->getDataPoints(); // loop over datapoints and compute the pooled_covariance // for (boolean i = data.gotoFirst(); i; i = data.gotoNext()) { // local variables // double datapoint_num_occ; VectorFloat mean; MatrixFloat covar; MatrixFloat mprod1; VectorFloat vprod1; MatrixFloat mprod2; MatrixFloat msum1; // get the datapoint in triple // DataPoint* datapoint = data.getCurr(); // get the statistical model and the occupancy // StatisticalModel& datapoint_stat_model = datapoint->second(); datapoint_num_occ = datapoint_stat_model.getOccupancy(); // get the mean and covariance of this mixture // res = datapoint_stat_model.getMean(mean); res = datapoint_stat_model.getCovariance(covar); // check if the covariance is non-diagonal // if (!covar.isDiagonal()) { diagonal = false; } // intermediate computations to compute matrices "a" and "c" // res = mprod1.outerProduct(mean, mean); res = msum1.add(covar, mprod1); res = msum1.mult(datapoint_num_occ); a.setDimensions(msum1); res = a.add(msum1); res = vprod1.assign(mean); res = vprod1.mult(datapoint_num_occ); c.setLength(vprod1.length()); res = c.add(vprod1); } // end of for loop for states on this node // get the sum of occupancies at this node // res = computeSumOccupancy(node_a, sum_num_occ); // compute the pooled covariance matrix and its determinant // res = a.div(sum_num_occ); res = c.div(sum_num_occ); res = b.outerProduct(c, c); res = pooled_covar.sub(a,b); // set the pooled covariance to diagonal if all the datapoints had // diagonal covariance. this is an assumptiom though the equations // don't generate a diagonal even if all the input data points have // diagonal covariances // if (diagonal) { // local variables // MatrixFloat temp; temp.setDimensions(pooled_covar); temp.setDiagonal(pooled_covar); pooled_covar.assign(temp); } // compute the determinant of the pooled covariance matrix // det_pooled_covariance_a = pooled_covar.determinant(); // exit gracefully // return res;}// method: computeLikelihoodSplitNode//// arguments:// float& split_likelihood: (output) likelihood if the node is split// TreeNode* node: (input) input node// Attribute& attribute: (input) input attribute that's used for// splitting the input node//// return: a boolean value indicating status//// this method computes the likelihood if the input node is split// using the input attribute.//boolean PhoneticDecisionTree::computeLikelihoodSplitNode(TreeNode* node_a, Attribute& attribute_a, float& split_likelihood_a) { // local variables // boolean res = true; split_likelihood_a = (float)0; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"computeLikelihoodSplitNode - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the name and values of this attribute // String& attr_name = attribute_a.first(); SingleLinkedList<String>& attr_values = attribute_a.second(); // get the data points on the node // PhoneticDecisionTreeNode* pdt_node = node_a->getItem(); Data& data = pdt_node->getDataPoints(); // get the first datapoint from the singlelinked list // DataPoint* datapoint = data.getFirst(); // check if this attribute is on the first item of the current node, // else return false // HashTable<String, String>& datapoint_attr = datapoint->third(); // return error if the attribute is missing on a datapoint // if (!datapoint_attr.containsKey(attr_name)) { return Error::handle(name(), L"computeLikelihoodSplitNode", ERR, __FILE__, __LINE__); } // loop over all the values for this attribute, compute likelihood // for each and then add them // for (boolean l = attr_values.gotoFirst(); l; l = attr_values.gotoNext()) { // local variables // TreeNode node; PhoneticDecisionTreeNode child_pdt_node; Data child_data; float likelihood = (float)0; // get the value of the current attribute // String* value = attr_values.getCurr(); // loop over data and count the number of each value // for (boolean j = data.gotoFirst(); j; j = data.gotoNext()) { // get the data point in triple // datapoint = data.getCurr(); // get the attribute value in hashtable // HashTable<String, String>& datapoint_attr = datapoint->third(); // check if this datapoint has this value for the current // attribute and add this to the singlelinked list // if(value->eq(*datapoint_attr.get(attr_name))) { child_data.insert(datapoint); } } // set the singlelinked list of data at this child node // child_pdt_node.setDataPoints(child_data); node.setItem(&child_pdt_node); // compute the likelihood for the child node only if there is // data on the node // if (!child_data.isEmpty()) { if (!computeLikelihoodNode(&node, likelihood)) { return Error::handle(name(), L"computeLikelihoodSplitNode", ERR, __FILE__, __LINE__); } } else res = false; // do other computations to get the split likelihood // split_likelihood_a += likelihood; } // exit gracefully // return res;}// method: classifyData//// arguments:// TreeNode* node: (input) input node// Attribute& attribute: (input) input attribute that's used for// splitting the input node//// return: a boolean value indicating status//// this method classifies the input node using the input attribute and// adds the splitted nodes as the children nodes to the input node in// the decisiontree. this method also adds the best question at the// parent-node//boolean PhoneticDecisionTree::classifyData(TreeNode* node_a, Attribute& attribute_a) { // local variables // PhoneticDecisionTreeNode* pdt_node = (PhoneticDecisionTreeNode*)NULL; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"classifyData - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the name and values for the attribute // String& attr_name = attribute_a.first(); SingleLinkedList<String>& attr_values = attribute_a.second(); // get the data on this node // pdt_node = node_a->getItem(); // check the node // if (pdt_node == (PhoneticDecisionTreeNode*)NULL) { return Error::handle(name(), L"classifyData - NULL PDTNODE", Error::ARG, __FILE__, __LINE__); } Data& data = pdt_node->getDataPoints(); // save the best attribute at this node // pdt_node->setBestAttribute(attr_name); // loop over attribute values and set the data in the child node // for each attribute value // for(boolean more = attr_values.gotoFirst(); more; more = attr_values.gotoNext()) { // local variables // PhoneticDecisionTreeNode child_pdt_node; Data data_child; // get the value of the current attribute // String* attr_value = attr_values.getCurr(); // loop over the data and accumulate the data with this attribute // value // for (boolean j = data.gotoFirst(); j; j = data.gotoNext()) { // get the data point in triple // DataPoint* datapoint = data.getCurr(); // get the hash table of attributes for this datapoint // HashTable<String, String>& datapoint_attr = datapoint->third(); if (attr_value->eq(*datapoint_attr.get(attr_name))) { // copy the datapoint and insert into the corresponding child // node // data_child.insert(datapoint); } } // return error if the input node is PURE // if (data_child.length() <= 0) { return Error::handle(name(), L"classifyData - PURE-NODE CANNOT BE CLASSIFIED", Error::ARG, __FILE__, __LINE__); } // add this node to the graph and make connections // child_pdt_node.setDataPoints(data_child); TreeNode* node_child = insertVertex(&child_pdt_node); insertArc(node_a, node_child, true); } // exit gracefully // return true;}// method: computeSumOccupancy//// arguments:// double& sum_num_occ: (output) sum of the occupancies of all the// datapoints(statistical models) at the input node//// TreeNode* node: (input) input node//// return: a boolean value indicating status//// this method sums the occupancies of all the datapoints.//boolean PhoneticDecisionTree::computeSumOccupancy(TreeNode* node_a, float& sum_num_occ_a) { // local variables // double datapoint_num_occ; sum_num_occ_a = (double)0; // check the node // if (node_a == (TreeNode*)NULL) { return Error::handle(name(), L"computeSumOccupancy - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // get the data in singlelinked list // PhoneticDecisionTreeNode* pdt_node = node_a->getItem(); Data& data = pdt_node->getDataPoints(); // loop over data and compute the sum of the occupancies of all the // datapoints on the node // for (boolean j = data.gotoFirst(); j; j = data.gotoNext()) { // get the datapoint in triple // DataPoint* datapoint = data.getCurr(); // get datapoint statistical model // StatisticalModel& datapoint_stat_model = datapoint->second(); // get the occupancy // datapoint_num_occ = (double)0; datapoint_num_occ = datapoint_stat_model.getOccupancy(); // add this occupancy to the sum // sum_num_occ_a += datapoint_num_occ; } // exit gracefully // return true;}// method: isSplitOccupancyBelowThreshold//// arguments:// TreeNode* node: (input) input node// Attribute& attribute: (input) input attribute used to split the input node//// return: a boolean value indicating status//// this method checks if the occupancies for each of the child nodes// generated by classifying the input node meets the threshold// conditions.//boolean PhoneticDecisionTree::isSplitOccupancyBelowThreshold(TreeNode* node_a, Attribute& attribute_a) { // local variables // boolean res = true;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -