📄 ridor.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Ridor.java
* Copyright (C) 2001 Xin Xu
*
*/
package weka.classifiers.rules;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.AdditionalMeasureProducer;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* The implementation of a RIpple-DOwn Rule learner.
*
* It generates the default rule first and then the exceptions for the default rule
* with the least (weighted) error rate. Then it generates the "best" exceptions for
* each exception and iterates until pure. Thus it performs a tree-like expansion of
* exceptions and the leaf has only default rule but no exceptions. <br>
* The exceptions are a set of rules that predict the class other than class in default
* rule. IREP is used to find out the exceptions. <p>
* There are five inner classes defined in this class. <br>
* The first is Ridor_node, which implements one node in the Ridor tree. It's basically
* composed of a default class and a set of exception rules to the default class.<br>
* The second inner class is RidorRule, which implements a single exception rule
* using REP.<br>
* The last three inner classes are only used in RidorRule. They are Antd, NumericAntd
* and NominalAntd, which all implement a single antecedent in the RidorRule. <br>
* The Antd class is an abstract class, which has two subclasses, NumericAntd and
* NominalAntd, to implement the corresponding abstract functions. These two subclasses
* implement the functions related to a antecedent with a nominal attribute and a numeric
* attribute respectively.<p>
*
*
* @author: Xin XU (xx5@cs.waikato.ac.nz)
* @version $Revision$
*/
public class Ridor extends Classifier
implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler {
/** The number of folds to split data into Grow and Prune for IREP */
private int m_Folds = 3;
/** The number of shuffles performed on the data for randomization */
private int m_Shuffle = 1;
/** Random object for randomization */
private Random m_Random = null;
/** The seed to perform randomization */
private int m_Seed = 1;
/** Whether use error rate on all the data */
private boolean m_IsAllErr = false;
/** Whether use majority class as default class */
private boolean m_IsMajority = false;
/** The root of Ridor */
private Ridor_node m_Root = null;
/** The class attribute of the data */
private Attribute m_Class;
/** Statistics of the data */
private double m_Cover, m_Err;
/** The minimal number of instance weights within a split*/
private double m_MinNo = 2.0;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "The implementation of a RIpple-DOwn Rule learner. "
+ "It generates a default rule first and then the exceptions for the default rule "
+ "with the least (weighted) error rate. Then it generates the \"best\" exceptions for "
+ "each exception and iterates until pure. Thus it performs a tree-like expansion of "
+ "exceptions."
+ "The exceptions are a set of rules that predict classes other than the default. "
+ "IREP is used to generate the exceptions.";
}
/**
* Private class implementing the single node of Ridor.
* It consists of a default class label, a set of exceptions to the default rule
* and the exceptions to each exception
*/
private class Ridor_node implements Serializable {
/** The default class label */
private double defClass = Double.NaN;
/** The set of exceptions of the default rule.
Each element also has its own exceptions and the consequent of each rule
is determined by its exceptions */
private RidorRule[] rules = null;
/** The exceptions of the exception rules */
private Ridor_node[] excepts = null;
/** The level of this node */
private int level;
/** "Get" member functions */
public double getDefClass() { return defClass; }
public RidorRule[] getRules() { return rules; }
public Ridor_node[] getExcepts() { return excepts; }
/**
* Builds a ripple-down manner rule learner.
*
* @param dataByClass the divided data by their class label. The real class
* labels of the instances are all set to 0
* @param lvl the level of the parent node
* @exception Exception if ruleset of this node cannot be built
*/
public void findRules(Instances[] dataByClass, int lvl) throws Exception {
Vector finalRules = null;
int clas = -1;
double[] isPure = new double[dataByClass.length];
int numMajority = 0;
level = lvl + 1;
for(int h=0; h < dataByClass.length; h++){
isPure[h] = dataByClass[h].sumOfWeights();
if(Utils.grOrEq(isPure[h], m_Folds))
numMajority++; // Count how many class labels have enough instances
}
if(numMajority <= 1){ // The data is pure or not enough
defClass = (double)Utils.maxIndex(isPure);
return;
}
double total = Utils.sum(isPure);
if(m_IsMajority){
defClass = (double)Utils.maxIndex(isPure);
Instances data = new Instances(dataByClass[(int)defClass]);
int index = data.classIndex();
for(int j=0; j<data.numInstances(); j++)
data.instance(j).setClassValue(1); // Set one class as default
for(int k=0; k < dataByClass.length; k++) // Merge into one dataset
if(k != (int)defClass){
if(data.numInstances() >= dataByClass[k].numInstances())
data = append(data, dataByClass[k]);
else data = append(dataByClass[k], data);
}
data.setClassIndex(index); // Position new class label
double classCount = total - isPure[(int)defClass];
finalRules = new Vector();
buildRuleset(data, classCount, finalRules);
if(finalRules.size() == 0) // No good rules built
return;
}
else{
double maxAcRt = isPure[Utils.maxIndex(isPure)] / total;
// Find default class
for(int i=0; i < dataByClass.length; i++){
if(isPure[i] >= m_Folds){
Instances data = new Instances(dataByClass[i]);
int index = data.classIndex();
for(int j=0; j<data.numInstances(); j++)
data.instance(j).setClassValue(1); // Set one class as default
for(int k=0; k < dataByClass.length; k++) // Merge into one dataset
if(k != i){
if(data.numInstances() >= dataByClass[k].numInstances())
data = append(data, dataByClass[k]);
else data = append(dataByClass[k], data);
}
data.setClassIndex(index); // Position new class label
/* Build a set of rules */
double classCount = data.sumOfWeights() - isPure[i];
Vector ruleset = new Vector();
double wAcRt = buildRuleset(data, classCount, ruleset);
if(Utils.gr(wAcRt, maxAcRt)){
finalRules = ruleset;
maxAcRt = wAcRt;
clas = i;
}
}
}
if(finalRules == null){ // No good rules found, set majority class as default
defClass = (double)Utils.maxIndex(isPure);
return;
}
defClass = (double)clas;
}
/* Store the exception rules and default class in this node */
int size = finalRules.size();
rules = new RidorRule[size];
excepts = new Ridor_node[size];
for(int l=0; l < size; l++)
rules[l] = (RidorRule)finalRules.elementAt(l);
/* Build exceptions for each exception rule */
Instances[] uncovered = dataByClass;
if(level == 1) // The error of default rule
m_Err = total - uncovered[(int)defClass].sumOfWeights();
uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0);
for(int m=0; m < size; m++){
/* The data covered by this rule, they are also deducted from the original data */
Instances[][] dvdData = divide(rules[m], uncovered);
Instances[] covered = dvdData[0]; // Data covered by the rule
//uncovered = dvdData[1]; // Data not covered by the rule
excepts[m] = new Ridor_node();
excepts[m].findRules(covered, level);// Find exceptions on the covered data
}
}
/**
* Private function to build a rule set and return the weighted avg of accuracy
* rate of rules in the set.
*
* @param insts the data used to build ruleset
* @param classCount the counts of the instances with the predicted class but not
* yet covered by the ruleset
* @param ruleset the ruleset to be built
* @return the weighted accuracy rate of the ruleset
* @exception if the rules cannot be built properly
*/
private double buildRuleset(Instances insts, double classCount, Vector ruleset)
throws Exception {
Instances data = new Instances(insts);
double wAcRt = 0; // The weighted accuracy rate of this ruleset
double total = data.sumOfWeights();
while( classCount >= m_Folds ){ // Data is not pure
RidorRule bestRule = null;
double bestWorthRate= -1; // The best worth achieved by
double bestWorth = -1; // randomization of the data
RidorRule rule = new RidorRule();
rule.setPredictedClass(0); // Predict the classes other than default
for(int j = 0; j < m_Shuffle; j++){
if(m_Shuffle > 1)
data.randomize(m_Random);
rule.buildClassifier(data);
double wr, w; // Worth rate and worth
if(m_IsAllErr){
wr = (rule.getWorth()+rule.getAccuG()) /
(rule.getCoverP()+rule.getCoverG());
w = rule.getWorth() + rule.getAccuG();
}
else{
wr = rule.getWorthRate();
w = rule.getWorth();
}
if(Utils.gr(wr, bestWorthRate) ||
(Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){
bestRule = rule;
bestWorthRate = wr;
bestWorth = w;
}
}
if (bestRule == null)
throw new Exception("Something wrong here inside findRule()!");
if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds()))
break; // No more good rules generated
Instances newData = new Instances(data);
data = new Instances(newData, 0);// Empty the data
classCount = 0;
double cover = 0; // Coverage of this rule on whole data
for(int l=0; l<newData.numInstances(); l++){
Instance datum = newData.instance(l);
if(!bestRule.isCover(datum)){// Data not covered by the previous rule
data.add(datum);
if(Utils.eq(datum.classValue(), 0))
classCount += datum.weight(); // The predicted class in the data
}
else cover += datum.weight();
}
wAcRt += computeWeightedAcRt(bestWorthRate, cover, total);
ruleset.addElement(bestRule);
}
/* The weighted def. accuracy */
double wDefAcRt = (data.sumOfWeights()-classCount) / total;
wAcRt += wDefAcRt;
return wAcRt;
}
/**
* Private function to combine two data
*
* @param data1 the data to which data2 is appended
* @param data2 the data to be appended to data1
* @return the merged data
*/
private Instances append(Instances data1, Instances data2){
Instances data = new Instances(data1);
for(int i=0; i<data2.numInstances(); i++)
data.add(data2.instance(i));
return data;
}
/**
* Compute the weighted average of accuracy rate of a certain rule
* Each rule is weighted by its coverage proportion in the whole data.
* So the accuracy rate of one ruleset is actually
*
* (worth rate) * (coverage proportion)
*
* coverage of the rule on the whole data
* where coverage proportion = -----------------------------------------
* the whole data size fed into the ruleset
*
* @param worthRt the worth rate
* @param cover the coverage of the rule on the whole data
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -