📄 ridor.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Ridor.java * Copyright (C) 2001 Xin Xu * */package weka.classifiers.rules;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * The implementation of a RIpple-DOwn Rule learner. * * It generates the default rule first and then the exceptions for the default rule * with the least (weighted) error rate. Then it generates the "best" exceptions for * each exception and iterates until pure. Thus it performs a tree-like expansion of * exceptions and the leaf has only default rule but no exceptions. <br> * The exceptions are a set of rules that predict the class other than class in default * rule. IREP is used to find out the exceptions. <p> * There are five inner classes defined in this class. <br> * The first is Ridor_node, which implements one node in the Ridor tree. It's basically * composed of a default class and a set of exception rules to the default class.<br> * The second inner class is RidorRule, which implements a single exception rule * using REP.<br> * The last three inner classes are only used in RidorRule. They are Antd, NumericAntd * and NominalAntd, which all implement a single antecedent in the RidorRule. <br> * The Antd class is an abstract class, which has two subclasses, NumericAntd and * NominalAntd, to implement the corresponding abstract functions. These two subclasses * implement the functions related to a antecedent with a nominal attribute and a numeric * attribute respectively.<p> * * * @author: Xin XU (xx5@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class Ridor extends Classifier implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler { /** The number of folds to split data into Grow and Prune for IREP */ private int m_Folds = 3; /** The number of shuffles performed on the data for randomization */ private int m_Shuffle = 1; /** Random object for randomization */ private Random m_Random = null; /** The seed to perform randomization */ private int m_Seed = 1; /** Whether use error rate on all the data */ private boolean m_IsAllErr = false; /** Whether use majority class as default class */ private boolean m_IsMajority = false; /** The root of Ridor */ private Ridor_node m_Root = null; /** The class attribute of the data */ private Attribute m_Class; /** Statistics of the data */ private double m_Cover, m_Err; /** The minimal number of instance weights within a split*/ private double m_MinNo = 2.0; /** * Private class implementing the single node of Ridor. * It consists of a default class label, a set of exceptions to the default rule * and the exceptions to each exception */ private class Ridor_node implements Serializable { /** The default class label */ private double defClass = Double.NaN; /** The set of exceptions of the default rule. Each element also has its own exceptions and the consequent of each rule is determined by its exceptions */ private RidorRule[] rules = null; /** The exceptions of the exception rules */ private Ridor_node[] excepts = null; /** The level of this node */ private int level; /** "Get" member functions */ public double getDefClass() { return defClass; } public RidorRule[] getRules() { return rules; } public Ridor_node[] getExcepts() { return excepts; } /** * Builds a ripple-down manner rule learner. * * @param dataByClass the divided data by their class label. The real class * labels of the instances are all set to 0 * @param lvl the level of the parent node * @exception Exception if ruleset of this node cannot be built */ public void findRules(Instances[] dataByClass, int lvl) throws Exception { Vector finalRules = null; int clas = -1; double[] isPure = new double[dataByClass.length]; int numMajority = 0; level = lvl + 1; for(int h=0; h < dataByClass.length; h++){ isPure[h] = dataByClass[h].sumOfWeights(); if(Utils.grOrEq(isPure[h], m_Folds)) numMajority++; // Count how many class labels have enough instances } if(numMajority <= 1){ // The data is pure or not enough defClass = (double)Utils.maxIndex(isPure); return; } double total = Utils.sum(isPure); if(m_IsMajority){ defClass = (double)Utils.maxIndex(isPure); Instances data = new Instances(dataByClass[(int)defClass]); int index = data.classIndex(); for(int j=0; j<data.numInstances(); j++) data.instance(j).setClassValue(1); // Set one class as default for(int k=0; k < dataByClass.length; k++) // Merge into one dataset if(k != (int)defClass){ if(data.numInstances() >= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label double classCount = total - isPure[(int)defClass]; finalRules = new Vector(); buildRuleset(data, classCount, finalRules); if(finalRules.size() == 0) // No good rules built return; } else{ double maxAcRt = isPure[Utils.maxIndex(isPure)] / total; // Find default class for(int i=0; i < dataByClass.length; i++){ if(isPure[i] >= m_Folds){ Instances data = new Instances(dataByClass[i]); int index = data.classIndex(); for(int j=0; j<data.numInstances(); j++) data.instance(j).setClassValue(1); // Set one class as default for(int k=0; k < dataByClass.length; k++) // Merge into one dataset if(k != i){ if(data.numInstances() >= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label /* Build a set of rules */ double classCount = data.sumOfWeights() - isPure[i]; Vector ruleset = new Vector(); double wAcRt = buildRuleset(data, classCount, ruleset); if(Utils.gr(wAcRt, maxAcRt)){ finalRules = ruleset; maxAcRt = wAcRt; clas = i; } } } if(finalRules == null){ // No good rules found, set majority class as default defClass = (double)Utils.maxIndex(isPure); return; } defClass = (double)clas; } /* Store the exception rules and default class in this node */ int size = finalRules.size(); rules = new RidorRule[size]; excepts = new Ridor_node[size]; for(int l=0; l < size; l++) rules[l] = (RidorRule)finalRules.elementAt(l); /* Build exceptions for each exception rule */ Instances[] uncovered = dataByClass; if(level == 1) // The error of default rule m_Err = total - uncovered[(int)defClass].sumOfWeights(); uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0); for(int m=0; m < size; m++){ /* The data covered by this rule, they are also deducted from the original data */ Instances[][] dvdData = divide(rules[m], uncovered); Instances[] covered = dvdData[0]; // Data covered by the rule //uncovered = dvdData[1]; // Data not covered by the rule excepts[m] = new Ridor_node(); excepts[m].findRules(covered, level);// Find exceptions on the covered data } } /** * Private function to build a rule set and return the weighted avg of accuracy * rate of rules in the set. * * @param insts the data used to build ruleset * @param classCount the counts of the instances with the predicted class but not * yet covered by the ruleset * @param ruleset the ruleset to be built * @return the weighted accuracy rate of the ruleset * @exception if the rules cannot be built properly */ private double buildRuleset(Instances insts, double classCount, Vector ruleset) throws Exception { Instances data = new Instances(insts); double wAcRt = 0; // The weighted accuracy rate of this ruleset double total = data.sumOfWeights(); while( classCount >= m_Folds ){ // Data is not pure RidorRule bestRule = null; double bestWorthRate= -1; // The best worth achieved by double bestWorth = -1; // randomization of the data RidorRule rule = new RidorRule(); rule.setPredictedClass(0); // Predict the classes other than default if(m_Random == null) m_Random = new Random(m_Seed); for(int j = 0; j < m_Shuffle; j++){ if(m_Shuffle > 1) data.randomize(m_Random); rule.buildClassifier(data); double wr, w; // Worth rate and worth if(m_IsAllErr){ wr = (rule.getWorth()+rule.getAccuG()) / (rule.getCoverP()+rule.getCoverG()); w = rule.getWorth() + rule.getAccuG(); } else{ wr = rule.getWorthRate(); w = rule.getWorth(); } if(Utils.gr(wr, bestWorthRate) || (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){ bestRule = rule; bestWorthRate = wr; bestWorth = w; } } if (bestRule == null) throw new Exception("Something wrong here inside findRule()!"); if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds())) break; // No more good rules generated Instances newData = new Instances(data); data = new Instances(newData, 0);// Empty the data classCount = 0; double cover = 0; // Coverage of this rule on whole data for(int l=0; l<newData.numInstances(); l++){ Instance datum = newData.instance(l); if(!bestRule.isCover(datum)){// Data not covered by the previous rule data.add(datum); if(Utils.eq(datum.classValue(), 0)) classCount += datum.weight(); // The predicted class in the data } else cover += datum.weight(); } wAcRt += computeWeightedAcRt(bestWorthRate, cover, total); ruleset.addElement(bestRule); } /* The weighted def. accuracy */ double wDefAcRt = (data.sumOfWeights()-classCount) / total; wAcRt += wDefAcRt; return wAcRt; } /** * Private function to combine two data * * @param data1 the data to which data2 is appended * @param data2 the data to be appended to data1 * @return the merged data */ private Instances append(Instances data1, Instances data2){ Instances data = new Instances(data1); for(int i=0; i<data2.numInstances(); i++) data.add(data2.instance(i)); return data; } /** * Compute the weighted average of accuracy rate of a certain rule * Each rule is weighted by its coverage proportion in the whole data. * So the accuracy rate of one ruleset is actually * * (worth rate) * (coverage proportion) * * coverage of the rule on the whole data * where coverage proportion = ----------------------------------------- * the whole data size fed into the ruleset * * @param worthRt the worth rate * @param cover the coverage of the rule on the whole data * @param total the total data size fed into the ruleset * @return the weighted accuracy rate of this rule */ private double computeWeightedAcRt(double worthRt, double cover, double total){ return (worthRt * (cover/total)); } /** * Builds an array of data according to their true class label * Each bag of data is filtered through the rule specified and * is totally covered by this rule. * Both the data covered and uncovered by the rule will be returned * by the procedure. * * @param rule the rule covering the data * @param dataByClass the array of data to be covered by the rule * @return the arrays of data both covered and not covered by the rule */ private Instances[][] divide(RidorRule rule, Instances[] dataByClass){ int len = dataByClass.length; Instances[][] dataBags = new Instances[2][len]; for(int i=0; i < len; i++){ Instances[] dvdData = rule.coveredByRule(dataByClass[i]); dataBags[0][i] = dvdData[0]; // Covered by the rule dataBags[1][i] = dvdData[1]; // Not covered by the rule } return dataBags; } /** * The size of the certain node of Ridor, i.e. the * number of rules generated within and below this node * * @return the size of this node */ public int size(){ int size = 0; if(rules != null){ for(int i=0; i < rules.length; i++) size += excepts[i].size(); // The children's size size += rules.length; // This node's size } return size; } /** * Prints the all the rules of one node of Ridor. * * @return a textual description of one node of Ridor */ public String toString(){ StringBuffer text = new StringBuffer(); if(level == 1) text.append(m_Class.name() + " = " + m_Class.value((int)getDefClass())+ " ("+m_Cover+"/"+m_Err+")\n"); if(rules != null){ for(int i=0; i < rules.length; i++){ for(int j=0; j < level; j++) text.append(" "); String cl = m_Class.value((int)(excepts[i].getDefClass())); text.append(" Except " + rules[i].toString(m_Class.name(), cl)+ "\n" + excepts[i].toString()); } } return text.toString(); } } /** * This class implements a single rule that predicts the 2-class distribution. * * A rule consists of antecedents "AND"ed together and the consequent (class value) * for the classification. In this case, the consequent is the distribution of * the available classes (always 2 classes) in the dataset. * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to select * an antecedent and Reduced Error Prunning (REP) is used to prune the rule. * */ private class RidorRule implements WeightedInstancesHandler{ /** The internal representation of the class label to be predicted*/ private double m_Class = -1; /** The class attribute of the data*/ private Attribute m_ClassAttribute; /** The vector of antecedents of this rule*/ protected FastVector m_Antds = null; /** The worth rate of this rule, in this case, accuracy rate in the pruning data*/ private double m_WorthRate = 0; /** The worth value of this rule, in this case, accurate # in pruning data*/ private double m_Worth = 0; /** The sum of weights of the data covered by this rule in the pruning data */ private double m_CoverP = 0; /** The accurate and covered data of this rule in the growing data */ private double m_CoverG = 0, m_AccuG = 0; /** The access functions for parameters */ public void setPredictedClass(double cl){ m_Class = cl; } public double getPredictedClass(){ return m_Class; } /** * Builds a single rule learner with REP dealing with 2 classes. * This rule learner always tries to predict the class with label * m_Class. * * @param instances the training data * @exception Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -