📄 ridor.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Ridor.java
 *    Copyright (C) 2001 Xin Xu
 *
 */

package weka.classifiers.rules;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.AdditionalMeasureProducer;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * The implementation of a RIpple-DOwn Rule learner.
 *
 * It generates the default rule first and then the exceptions for the default rule
 * with the least (weighted) error rate.  Then it generates the "best" exceptions for
 * each exception and iterates until pure.  Thus it performs a tree-like expansion of
 * exceptions and the leaf has only default rule but no exceptions. <br>
 * The exceptions are a set of rules that predict the class other than class in default
 * rule.  IREP is used to find out the exceptions. <p>
 * There are five inner classes defined in this class. <br>
 * The first is Ridor_node, which implements one node in the Ridor tree.  It's basically
 * composed of a default class and a set of exception rules to the default class.<br>
 * The second inner class is RidorRule, which implements a single exception rule 
 * using REP.<br>
 * The last three inner classes are only used in RidorRule.  They are Antd, NumericAntd 
 * and NominalAntd, which all implement a single antecedent in the RidorRule. <br>
 * The Antd class is an abstract class, which has two subclasses, NumericAntd and 
 * NominalAntd, to implement the corresponding abstract functions.  These two subclasses
 * implement the functions related to a antecedent with a nominal attribute and a numeric 
 * attribute respectively.<p>
 * 
 *
 * @author: Xin XU (xx5@cs.waikato.ac.nz)
 * @version $Revision$ 
 */

public class Ridor extends Classifier
  implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler {

  /** The number of folds to split data into Grow and Prune for IREP */
  private int m_Folds = 3;
    
  /** The number of shuffles performed on the data for randomization */
  private int m_Shuffle = 1;

  /** Random object for randomization */
  private Random m_Random = null;
    
  /** The seed to perform randomization */
  private int m_Seed = 1;

  /** Whether use error rate on all the data */
  private boolean m_IsAllErr = false;

  /** Whether use majority class as default class */
  private boolean m_IsMajority = false;
    
  /** The root of Ridor */
  private Ridor_node m_Root = null;
    
  /** The class attribute of the data */
  private Attribute m_Class;

  /** Statistics of the data */
  private double m_Cover, m_Err;

  /** The minimal number of instance weights within a split*/
  private double m_MinNo = 2.0;
    
  /**
   * Returns a string describing classifier
   * @return a description suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "The implementation of a RIpple-DOwn Rule learner. " 
      + "It generates a default rule first and then the exceptions for the default rule "
      + "with the least (weighted) error rate.  Then it generates the \"best\" exceptions for "
      + "each exception and iterates until pure.  Thus it performs a tree-like expansion of "
      + "exceptions."
      + "The exceptions are a set of rules that predict classes other than the default. "
      + "IREP is used to generate the exceptions.";
  }
    
  /** 
   * Private class implementing the single node of Ridor. 
   * It consists of a default class label, a set of exceptions to the default rule
   * and the exceptions to each exception
   */
  private class Ridor_node implements Serializable {
	
    /** The default class label */
    private double defClass = Double.NaN;
	
    /** The set of exceptions of the default rule. 
	Each element also has its own exceptions and the consequent of each rule 
	is determined by its exceptions */
    private RidorRule[] rules = null;
	
    /** The exceptions of the exception rules */
    private Ridor_node[] excepts = null; 

    /** The level of this node */
    private int level;

    /** "Get" member functions */
    public double getDefClass() { return defClass; }
    public RidorRule[] getRules() { return rules; }
    public Ridor_node[] getExcepts() { return excepts; }

    /**
     * Builds a ripple-down manner rule learner.
     *
     * @param dataByClass the divided data by their class label. The real class
     * labels of the instances are all set to 0
     * @param lvl the level of the parent node
     * @exception Exception if ruleset of this node cannot be built
     */
    public void findRules(Instances[] dataByClass, int lvl) throws Exception {
      Vector finalRules = null;
      int clas = -1;
      double[] isPure = new double[dataByClass.length];
      int numMajority = 0;
	    
      level = lvl + 1;
	    
      for(int h=0; h < dataByClass.length; h++){
	isPure[h] = dataByClass[h].sumOfWeights();
	if(Utils.grOrEq(isPure[h], m_Folds))
	  numMajority++;  // Count how many class labels have enough instances
      }
	    
      if(numMajority <= 1){	                   // The data is pure or not enough
	defClass = (double)Utils.maxIndex(isPure);
	return;
      }
      double total = Utils.sum(isPure);	 
	    
      if(m_IsMajority){
	defClass = (double)Utils.maxIndex(isPure);
	Instances data = new Instances(dataByClass[(int)defClass]);
	int index = data.classIndex();
		
	for(int j=0; j<data.numInstances(); j++)
	  data.instance(j).setClassValue(1);       // Set one class as default
		
	for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset
	  if(k != (int)defClass){
	    if(data.numInstances() >= dataByClass[k].numInstances())
	      data = append(data, dataByClass[k]);
	    else data = append(dataByClass[k], data);
	  }
		
	data.setClassIndex(index);           // Position new class label
		
	double classCount = total - isPure[(int)defClass];
	finalRules = new Vector();
	buildRuleset(data, classCount, finalRules);
	if(finalRules.size() == 0)           // No good rules built
	  return;
      }
      else{
	double maxAcRt = isPure[Utils.maxIndex(isPure)] / total;
		
	// Find default class
	for(int i=0; i < dataByClass.length; i++){
	  if(isPure[i] >= m_Folds){
	    Instances data = new Instances(dataByClass[i]);
	    int index = data.classIndex();
			
	    for(int j=0; j<data.numInstances(); j++)
	      data.instance(j).setClassValue(1);       // Set one class as default
			
	    for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset
	      if(k != i){
		if(data.numInstances() >= dataByClass[k].numInstances())
		  data = append(data, dataByClass[k]);
		else data = append(dataByClass[k], data);
	      }
			
	    data.setClassIndex(index);           // Position new class label 
			
	    /* Build a set of rules */
	    double classCount = data.sumOfWeights() - isPure[i];
	    Vector ruleset = new Vector();
	    double wAcRt = buildRuleset(data, classCount, ruleset); 
			
	    if(Utils.gr(wAcRt, maxAcRt)){
	      finalRules = ruleset;
	      maxAcRt = wAcRt;
	      clas = i;
	    }
	  }
	}
		
	if(finalRules == null){ // No good rules found, set majority class as default
	  defClass = (double)Utils.maxIndex(isPure);
	  return;
	}
		
	defClass = (double)clas;
      }
			
      /* Store the exception rules and default class in this node */
      int size = finalRules.size();
      rules = new RidorRule[size];
      excepts = new Ridor_node[size];
      for(int l=0; l < size; l++)
	rules[l] = (RidorRule)finalRules.elementAt(l);
	    
      /* Build exceptions for each exception rule */
      Instances[] uncovered = dataByClass; 
      if(level == 1)  // The error of default rule
	m_Err = total - uncovered[(int)defClass].sumOfWeights();			

      uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0);    
	    
      for(int m=0; m < size; m++){
	/* The data covered by this rule, they are also deducted from the original data */
	Instances[][] dvdData = divide(rules[m], uncovered);
	Instances[] covered = dvdData[0];    // Data covered by the rule
	//uncovered = dvdData[1];            // Data not covered by the rule
	excepts[m] = new Ridor_node();
	excepts[m].findRules(covered, level);// Find exceptions on the covered data
      }
    }

    /**	
     * Private function to build a rule set and return the weighted avg of accuracy
     * rate of rules in the set.
     *
     * @param insts the data used to build ruleset
     * @param classCount the counts of the instances with the predicted class but not
     *                   yet covered by the ruleset
     * @param ruleset the ruleset to be built
     * @return the weighted accuracy rate of the ruleset
     * @exception if the rules cannot be built properly
     */
    private double buildRuleset(Instances insts, double classCount, Vector ruleset) 
      throws Exception {	    
      Instances data = new Instances(insts);
      double wAcRt = 0;  // The weighted accuracy rate of this ruleset
      double total = data.sumOfWeights();
	    
      while( classCount >= m_Folds ){      // Data is not pure
	RidorRule bestRule = null;
	double bestWorthRate= -1;        // The best worth achieved by
	double bestWorth = -1;           // randomization of the data
		
	RidorRule rule = new RidorRule();                                
	rule.setPredictedClass(0);       // Predict the classes other than default
		
	for(int j = 0; j < m_Shuffle; j++){
	  if(m_Shuffle > 1)
	    data.randomize(m_Random);
		    
	  rule.buildClassifier(data);
		    
	  double wr, w; // Worth rate and worth
	  if(m_IsAllErr){
	    wr = (rule.getWorth()+rule.getAccuG()) / 
	      (rule.getCoverP()+rule.getCoverG());
	    w = rule.getWorth() + rule.getAccuG();
	  }
	  else{
	    wr = rule.getWorthRate();
	    w = rule.getWorth(); 
	  }
		    
	  if(Utils.gr(wr, bestWorthRate) ||
	     (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){
	    bestRule = rule;
	    bestWorthRate = wr;
	    bestWorth = w;
	  }
	}
		
	if (bestRule == null)
	  throw new Exception("Something wrong here inside findRule()!");
		
	if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds()))
	  break;                       // No more good rules generated
		
	Instances newData = new Instances(data); 
	data = new Instances(newData, 0);// Empty the data
	classCount = 0;
	double cover = 0;                // Coverage of this rule on whole data
		
	for(int l=0; l<newData.numInstances(); l++){
	  Instance datum = newData.instance(l);
	  if(!bestRule.isCover(datum)){// Data not covered by the previous rule
	    data.add(datum);
	    if(Utils.eq(datum.classValue(), 0)) 
	      classCount += datum.weight(); // The predicted class in the data
	  }
	  else cover += datum.weight();
	}			
		
	wAcRt += computeWeightedAcRt(bestWorthRate, cover, total);
	ruleset.addElement(bestRule);			
      }  
	    
      /* The weighted def. accuracy */
      double wDefAcRt = (data.sumOfWeights()-classCount) / total;		    
      wAcRt += wDefAcRt;
	    
      return wAcRt;
    }
	
    /**
     * Private function to combine two data
     *
     * @param data1 the data to which data2 is appended 
     * @param data2 the data to be appended to data1
     * @return the merged data
     */
    private Instances append(Instances data1, Instances data2){
      Instances data = new Instances(data1);
      for(int i=0; i<data2.numInstances(); i++)
	data.add(data2.instance(i));
	    
      return data;
    }
	
    /**
     * Compute the weighted average of accuracy rate of a certain rule
     * Each rule is weighted by its coverage proportion in the whole data.  
     * So the accuracy rate of one ruleset is actually 
     * 
     * (worth rate) * (coverage proportion)
     *
     *                               coverage of the rule on the whole data
     * where coverage proportion = -----------------------------------------
     *                              the whole data size fed into the ruleset
     *
     * @param worthRt the worth rate
     * @param cover the coverage of the rule on the whole data
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -