📄 ridor.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Ridor.java * Copyright (C) 2001 Xin Xu * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.core.AdditionalMeasureProducer;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.UnsupportedClassTypeException;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.io.Serializable;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * The implementation of a RIpple-DOwn Rule learner.<br/> * <br/> * It generates a default rule first and then the exceptions for the default rule with the least (weighted) error rate. Then it generates the "best" exceptions for each exception and iterates until pure. Thus it performs a tree-like expansion of exceptions.The exceptions are a set of rules that predict classes other than the default. IREP is used to generate the exceptions.<br/> * <br/> * For more information about Ripple-Down Rules, see:<br/> * <br/> * Brian R. Gaines, Paul Compton (1995). Induction of Ripple-Down Rules Applied to Modeling Large Databases. J. Intell. Inf. Syst.. 5(3):211-228. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Gaines1995, * author = {Brian R. Gaines and Paul Compton}, * journal = {J. Intell. Inf. Syst.}, * number = {3}, * pages = {211-228}, * title = {Induction of Ripple-Down Rules Applied to Modeling Large Databases}, * volume = {5}, * year = {1995}, * PDF = {http://pages.cpsc.ucalgary.ca/~gaines/reports/ML/JIIS95/JIIS95.pdf} * } * </pre> * <p/> <!-- technical-bibtex-end --> * * There are five inner classes defined in this class. <br> * The first is Ridor_node, which implements one node in the Ridor tree. It's basically * composed of a default class and a set of exception rules to the default class.<br> * The second inner class is RidorRule, which implements a single exception rule * using REP.<br> * The last three inner classes are only used in RidorRule. They are Antd, NumericAntd * and NominalAntd, which all implement a single antecedent in the RidorRule. <br> * The Antd class is an abstract class, which has two subclasses, NumericAntd and * NominalAntd, to implement the corresponding abstract functions. These two subclasses * implement the functions related to a antecedent with a nominal attribute and a numeric * attribute respectively.<p> * <!-- options-start --> * Valid options are: <p/> * * <pre> -F <number of folds> * Set number of folds for IREP * One fold is used as pruning set. * (default 3)</pre> * * <pre> -S <number of shuffles> * Set number of shuffles to randomize * the data in order to get better rule. * (default 10)</pre> * * <pre> -A * Set flag of whether use the error rate * of all the data to select the default class * in each step. If not set, the learner will only use the error rate in the pruning data</pre> * * <pre> -M * Set flag of whether use the majority class as * the default class in each step instead of * choosing default class based on the error rate * (if the flag is not set)</pre> * * <pre> -N <min. weights> * Set the minimal weights of instances * within a split. * (default 2.0)</pre> * <!-- options-end --> * * @author Xin XU (xx5@cs.waikato.ac.nz) * @version $Revision: 1.17 $ */public class Ridor extends Classifier implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -7261533075088314436L; /** The number of folds to split data into Grow and Prune for IREP */ private int m_Folds = 3; /** The number of shuffles performed on the data for randomization */ private int m_Shuffle = 1; /** Random object for randomization */ private Random m_Random = null; /** The seed to perform randomization */ private int m_Seed = 1; /** Whether use error rate on all the data */ private boolean m_IsAllErr = false; /** Whether use majority class as default class */ private boolean m_IsMajority = false; /** The root of Ridor */ private Ridor_node m_Root = null; /** The class attribute of the data */ private Attribute m_Class; /** Statistics of the data */ private double m_Cover, m_Err; /** The minimal number of instance weights within a split*/ private double m_MinNo = 2.0; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "The implementation of a RIpple-DOwn Rule learner.\n\n" + "It generates a default rule first and then the exceptions for the default rule " + "with the least (weighted) error rate. Then it generates the \"best\" exceptions for " + "each exception and iterates until pure. Thus it performs a tree-like expansion of " + "exceptions." + "The exceptions are a set of rules that predict classes other than the default. " + "IREP is used to generate the exceptions.\n\n" + "For more information about Ripple-Down Rules, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Brian R. Gaines and Paul Compton"); result.setValue(Field.YEAR, "1995"); result.setValue(Field.TITLE, "Induction of Ripple-Down Rules Applied to Modeling Large Databases"); result.setValue(Field.JOURNAL, "J. Intell. Inf. Syst."); result.setValue(Field.VOLUME, "5"); result.setValue(Field.NUMBER, "3"); result.setValue(Field.PAGES, "211-228"); result.setValue(Field.PDF, "http://pages.cpsc.ucalgary.ca/~gaines/reports/ML/JIIS95/JIIS95.pdf"); return result; } /** * Private class implementing the single node of Ridor. * It consists of a default class label, a set of exceptions to the default rule * and the exceptions to each exception */ private class Ridor_node implements Serializable { /** for serialization */ static final long serialVersionUID = -581370560157467677L; /** The default class label */ private double defClass = Double.NaN; /** The set of exceptions of the default rule. Each element also has its own exceptions and the consequent of each rule is determined by its exceptions */ private RidorRule[] rules = null; /** The exceptions of the exception rules */ private Ridor_node[] excepts = null; /** The level of this node */ private int level; /** * Gets the default class label * * @return the default class label */ public double getDefClass() { return defClass; } /** * Gets the set of exceptions * * @return the set of exceptions */ public RidorRule[] getRules() { return rules; } /** * Gets the exceptions of the exceptions rules * * @return the exceptions of the exceptions rules */ public Ridor_node[] getExcepts() { return excepts; } /** * Builds a ripple-down manner rule learner. * * @param dataByClass the divided data by their class label. The real class * labels of the instances are all set to 0 * @param lvl the level of the parent node * @throws Exception if ruleset of this node cannot be built */ public void findRules(Instances[] dataByClass, int lvl) throws Exception { Vector finalRules = null; int clas = -1; double[] isPure = new double[dataByClass.length]; int numMajority = 0; level = lvl + 1; for(int h=0; h < dataByClass.length; h++){ isPure[h] = dataByClass[h].sumOfWeights(); if(Utils.grOrEq(isPure[h], m_Folds)) numMajority++; // Count how many class labels have enough instances } if(numMajority <= 1){ // The data is pure or not enough defClass = (double)Utils.maxIndex(isPure); return; } double total = Utils.sum(isPure); if(m_IsMajority){ defClass = (double)Utils.maxIndex(isPure); Instances data = new Instances(dataByClass[(int)defClass]); int index = data.classIndex(); for(int j=0; j<data.numInstances(); j++) data.instance(j).setClassValue(1); // Set one class as default for(int k=0; k < dataByClass.length; k++) // Merge into one dataset if(k != (int)defClass){ if(data.numInstances() >= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label double classCount = total - isPure[(int)defClass]; finalRules = new Vector(); buildRuleset(data, classCount, finalRules); if(finalRules.size() == 0) // No good rules built return; } else{ double maxAcRt = isPure[Utils.maxIndex(isPure)] / total; // Find default class for(int i=0; i < dataByClass.length; i++){ if(isPure[i] >= m_Folds){ Instances data = new Instances(dataByClass[i]); int index = data.classIndex(); for(int j=0; j<data.numInstances(); j++) data.instance(j).setClassValue(1); // Set one class as default for(int k=0; k < dataByClass.length; k++) // Merge into one dataset if(k != i){ if(data.numInstances() >= dataByClass[k].numInstances()) data = append(data, dataByClass[k]); else data = append(dataByClass[k], data); } data.setClassIndex(index); // Position new class label /* Build a set of rules */ double classCount = data.sumOfWeights() - isPure[i]; Vector ruleset = new Vector(); double wAcRt = buildRuleset(data, classCount, ruleset); if(Utils.gr(wAcRt, maxAcRt)){ finalRules = ruleset; maxAcRt = wAcRt; clas = i; } } } if(finalRules == null){ // No good rules found, set majority class as default defClass = (double)Utils.maxIndex(isPure); return; } defClass = (double)clas; } /* Store the exception rules and default class in this node */ int size = finalRules.size(); rules = new RidorRule[size]; excepts = new Ridor_node[size]; for(int l=0; l < size; l++) rules[l] = (RidorRule)finalRules.elementAt(l); /* Build exceptions for each exception rule */ Instances[] uncovered = dataByClass; if(level == 1) // The error of default rule m_Err = total - uncovered[(int)defClass].sumOfWeights(); uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0); for(int m=0; m < size; m++){ /* The data covered by this rule, they are also deducted from the original data */ Instances[][] dvdData = divide(rules[m], uncovered); Instances[] covered = dvdData[0]; // Data covered by the rule //uncovered = dvdData[1]; // Data not covered by the rule excepts[m] = new Ridor_node(); excepts[m].findRules(covered, level);// Find exceptions on the covered data } } /** * Private function to build a rule set and return the weighted avg of accuracy * rate of rules in the set. * * @param insts the data used to build ruleset * @param classCount the counts of the instances with the predicted class but not * yet covered by the ruleset * @param ruleset the ruleset to be built * @return the weighted accuracy rate of the ruleset * @throws Exception if the rules cannot be built properly */ private double buildRuleset(Instances insts, double classCount, Vector ruleset) throws Exception { Instances data = new Instances(insts); double wAcRt = 0; // The weighted accuracy rate of this ruleset double total = data.sumOfWeights(); while( classCount >= m_Folds ){ // Data is not pure RidorRule bestRule = null; double bestWorthRate= -1; // The best worth achieved by double bestWorth = -1; // randomization of the data RidorRule rule = new RidorRule(); rule.setPredictedClass(0); // Predict the classes other than default for(int j = 0; j < m_Shuffle; j++){ if(m_Shuffle > 1) data.randomize(m_Random); rule.buildClassifier(data); double wr, w; // Worth rate and worth if(m_IsAllErr){ wr = (rule.getWorth()+rule.getAccuG()) / (rule.getCoverP()+rule.getCoverG()); w = rule.getWorth() + rule.getAccuG(); } else{ wr = rule.getWorthRate(); w = rule.getWorth(); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -