📄 pdt_05.cc
字号:
// else { return Error::handle(name(), L"classifyDataPoint", ERR, __FILE__, __LINE__); } // exit gracefully // return index;}// method: findClass//// arguments:// TreeNode* node: (input) input node, below which the input datapoint will// be classified to any of the leaf-nodes// DataPoint& datapoint: (input) input data-point that will be classified//// return: a Long value (index) indicating the class//// this method clasifies the data//Long PhoneticDecisionTree::findClass(TreeNode* node_a, DataPoint& datapoint_a) { // local variables // Long index = -1; // get the PhoneticDecisionTreeNode // PhoneticDecisionTreeNode* pdt_node = node_a->getItem(); // iterate though this function only if this is not a leaf-node // if (node_a->gotoFirstChild()) { // get the best-attribute-name at this node // String& best_attr_name = pdt_node->getBestAttribute(); // get the value of the best-attribute-name at this datapoint // String* best_attr_value = datapoint_a.third().get(best_attr_name); // get all the attribute-values corresponding to this attribute-name // Attribute* attribute = (Attribute*)NULL; // loop over all the attributes and find the corresponding attribute // for (boolean more = attributes_d.gotoFirst(); more; more = attributes_d.gotoNext()) { // get the attribute-values coresponding to the best-attribute-name // if (attributes_d.getCurr()->first().eq(best_attr_name)) { attribute = attributes_d.getCurr(); break; } } // make sure the attribute is defined // if (attribute == (Attribute*)NULL) { return Error::handle(name(), L"findClass - null attribute", Error::ARG, __FILE__, __LINE__); } // loop-over all the children-nodes of this node and classify the // datapoint to one of them that has the same // attribute-value. continue to iterate till we hit any of the // leaf-nodes // // get all the child nodes of this node // DoubleLinkedList<BiGraphArc<PhoneticDecisionTreeNode> >* children; BiGraphArc<PhoneticDecisionTreeNode>* child; BiGraphVertex<PhoneticDecisionTreeNode>* child_node; children = node_a->getChildren(); // loop over all the children // attribute->second().gotoFirst(); for (boolean moreb = children->gotoFirst(); moreb; moreb = children->gotoNext()) { // get the attr-value corresponding to the child-node. note // that there is one-to-one correspondance between them // String* temp_attr_value; temp_attr_value = attribute->second().getCurr(); attribute->second().gotoNext(); // if the attr-value at this datapoint match, we have got the // corresponding child node // if (temp_attr_value->eq(*best_attr_value)) { child = children->getCurr(); child_node = child->getVertex(); // call this function iteratively // index = findClass(child_node, datapoint_a); } } } // else this node is a leaf node, and so return the typical index // else { // return the typical index at this leaf-node // index = pdt_node->getTypicalIndex(); // error: invalid statistical-model index // if (index == PhoneticDecisionTreeNode::DEF_TYPICAL_INDEX) { return Error::handle(name(), L"classifyData", ERR, __FILE__, __LINE__); } // exit gracefully // return index; } // exit gracefully // return index;}// method: findTypicalIndex//// arguments:// TreeNode* node: (input) input node//// return: a index that indicates the typical model at this node//Long PhoneticDecisionTree::findTypicalIndex(TreeNode* node_a) { // local variables // TreeNode* node = (TreeNode*)NULL; // get the node // node = node_a; // check the node // if (node == (TreeNode*)NULL) { return Error::handle(name(), L"findTypicalNode - NULL VERTEX", Error::ARG, __FILE__, __LINE__); } // local variables // Long typical_index = (Long)-1; PhoneticDecisionTreeNode pdt_node; Data data; DataPoint datapoint; double max_scale = (double)-1000; // loop-over all the data at this leaf-node and find the typical // statistical-model to which all the rest of the // statistical-models at this node will be tied to. the model with // the highest scale or lowest variance is a typical model // // get all the data points on the node // pdt_node = *(node->getItem()); data = pdt_node.getDataPoints(); for (boolean morea = data.gotoFirst(); morea; morea = data.gotoNext()) { // local variables // double scale; StatisticalModel datapoint_stat_model; Long temp_index; // get the inverse scale for this datapoint // datapoint = *(data.getCurr()); datapoint_stat_model = datapoint.second(); scale = computeScale(datapoint_stat_model); scale = 2.0 * scale; // get the index of this statistical-model // temp_index = datapoint.first(); // find the typical statistical model index // if ( scale > max_scale) { max_scale = scale; typical_index = temp_index; } } // exit gracefully // return typical_index;} // method: computeScale//// arguments:// StatisticalModel stat_model: (input) input statistical model//// return: this method returns the scale of the input StatisticalModel// (GaussianModel) with single mixture//double PhoneticDecisionTree::computeScale(StatisticalModel& stat_model_a) { // temporary variables // double scale; VectorFloat mean; MatrixFloat covariance; // compute only if the underlying model is MixtureModel, else // error // if (stat_model_a.getType() == StatisticalModel::MIXTURE_MODEL) { // local variables // MixtureModel mixture_model; SingleLinkedList<StatisticalModel> mixtures; StatisticalModel mixture; // get the mixtures as SingleLinkedList of StatisticalModels // mixture_model = stat_model_a.getMixtureModel(); mixtures = mixture_model.getModels(); // get the first mixture as StatisticalModel // mixture = *(mixtures.getFirst()); // check if the distribution is gaussian and single mixture // if ((mixture.getType() == StatisticalModel::GAUSSIAN_MODEL) && (mixtures.length() == (long)1) ) { // get number of features from the mean dimensions last // datapoint statistical node, set before this if statement // // local variables // VectorFloat mean; MatrixFloat covariance; // get the mean and covariance of the statistical model. note // that the underlying distribution are actually gaussian in // this case // stat_model_a.getMean(mean); stat_model_a.getCovariance(covariance); // check the arguments // long len_mean = mean.length(); long len_cov = covariance.getNumRows(); if ((len_mean != len_cov) || (len_mean <= 0) || (len_cov <= 0)) { return false; } // compute the scale factor from its components. // double det = Integral::log(covariance.determinant()); double tmp = Integral::log(Integral::TWO_PI); scale = (double)0.5 * ((double)len_mean * tmp + det); } // error: unknown distribution and multiple mixture models // else { return Error::handle(name(), L"computeScale", ERR, __FILE__, __LINE__); } } // error: only MixtureModel supported // else { return Error::handle(name(), L"computeScale", ERR, __FILE__, __LINE__); } // exit gracefully // return scale; }// method: createContexts//// arguments:// Vector<SearchSymbols>& symbols: (input) input symbols// long& length: (input) length of the contexts// Vector<ContextMap>& all_contexts: (output) all contexts//// return: a boolean value indicating status//// this method creates all possible contexts//boolean PhoneticDecisionTree::createContexts(Vector<SearchSymbol>& symbols_a, long& length_a, Vector<ContextMap>& all_contexts_a) { // local variable // boolean status = true; // set the capacity of the all-context vector // long capacity = (long)Integral::pow((double)symbols_a.length(), length_a); all_contexts_a.setCapacity(capacity); // loop over the number of context-length. we'll add the symbol for // a certain context length in each iteration // for (long i = 0; i < length_a; i++) { // call the function that appends the context at // each-context-level // status = appendContextLevel(symbols_a, i, all_contexts_a); } // exit gracefully // return status;} // method: appendContextLevel//// arguments:// Vector<SearchSymbols>& symbols: (input) input symbols// long& level: (input) level of the contexts// Vector<ContextMap>& all_contexts: (output) appended contexts//// return: a boolean value indicating status//// this method appends the symbols to the contexts at any given// context-level//boolean PhoneticDecisionTree::appendContextLevel(Vector<SearchSymbol>& symbols_a, long& level_a, Vector<ContextMap>& all_contexts_a) { // local variable // boolean status = true; Vector<ContextMap> temp_all_contexts; // set the capacity of the temp-all-context vector // long capacity = (long)Integral::pow((double)symbols_a.length(), (level_a +1)); temp_all_contexts.setCapacity(capacity); // loop-over all the existing contexts, remove the existing ones and // add the new ones with existing ones // long all_contexts_len = all_contexts_a.length(); // increment the all_context_length to 1 is the level is zero. this // means that no contexts exist // if ( level_a == (long)0) { all_contexts_len++; } for (long j = 0; j < all_contexts_len; j++) { // get the current context, only if the context exists // Vector<SearchSymbol> vec_ss; if (level_a != (long)0) { vec_ss = all_contexts_a(j).getContext(); } // loop-over all the symbols and add these to each of the // context. this will increase the context-length // for (long i = 0; i < symbols_a.length(); i ++) { // append the context with the symbol // ContextMap context; Vector<SearchSymbol> temp_vec_ss; temp_vec_ss.assign(vec_ss); temp_vec_ss.concat(symbols_a(i)); context.setContext(temp_vec_ss); // add the context (partial or full) in all-context vector // long context_len = temp_all_contexts.length(); temp_all_contexts.setLength(context_len + (long)1); temp_all_contexts(context_len).assign(context); } } // assign the contexts // all_contexts_a.clear(); all_contexts_a.assign(temp_all_contexts); // exit gracefully // return status;}// method: validateContexts//// arguments:// Vector<SearchSymbol>& contextless_symbol_table: (input) contextless// symbol-table// Vector<ContextMap>& all_contexts: (input) all contexts// Vector<ContextMap>& valid_contexts: (output) all valid contexts//// return: a boolean value indicating status//// this method removes all the contexts that are non-allowable. the// non-allowable contexts are:// 1. NO_LEFT_CONTEXT can't occur as right symbol in the context// 2. NO_RIGHT_CONTEXT can't occur as left symbol in the context// 3. both of these can't occur as central symbol// 4. all contexts with central symbol as contextless symbols//boolean PhoneticDecisionTree::validateContexts(Vector<SearchSymbol>& contextless_symbol_table_a, Vector<ContextMap>& all_contexts_a, Vector<ContextMap>& valid_contexts_a) { // local variable // boolean status = true; // set the capacity of the valid-contexts vector // long capacity = all_contexts_a.length(); valid_contexts_a.setCapacity(capacity); // loop-over all the contexts and accumulate the valid-contexts // for (long i = 0; i < all_contexts_a.length(); i++) { // get the context and check of all the 4 invalid conditions // boolean valid = true; Vector<SearchSymbol> vec_ss = all_contexts_a(i).getContext(); long vec_len = vec_ss.length(); for (long j = 0; j < vec_len; j++) { // check for condition 1 // if ( (j < (vec_len/2)) && (vec_ss(j).eq(SearchSymbol::NO_RIGHT_CONTEXT)) ) { valid = false; } // check for condition 2 // if ( (j > (vec_len/2)) && (vec_ss(j).eq(SearchSymbol::NO_LEFT_CONTEXT)) ) { valid = false; } // check for condition 3 //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -