⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ridor.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Ridor.java *    Copyright (C) 2001 Xin Xu * */package weka.classifiers.rules;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * The implementation of a RIpple-DOwn Rule learner. * * It generates the default rule first and then the exceptions for the default rule * with the least (weighted) error rate.  Then it generates the "best" exceptions for * each exception and iterates until pure.  Thus it performs a tree-like expansion of * exceptions and the leaf has only default rule but no exceptions. <br> * The exceptions are a set of rules that predict the class other than class in default * rule.  IREP is used to find out the exceptions. <p> * There are five inner classes defined in this class. <br> * The first is Ridor_node, which implements one node in the Ridor tree.  It's basically * composed of a default class and a set of exception rules to the default class.<br> * The second inner class is RidorRule, which implements a single exception rule  * using REP.<br> * The last three inner classes are only used in RidorRule.  They are Antd, NumericAntd  * and NominalAntd, which all implement a single antecedent in the RidorRule. <br> * The Antd class is an abstract class, which has two subclasses, NumericAntd and  * NominalAntd, to implement the corresponding abstract functions.  These two subclasses * implement the functions related to a antecedent with a nominal attribute and a numeric  * attribute respectively.<p> *  * * @author: Xin XU (xx5@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $  */public class Ridor extends Classifier  implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler {  /** The number of folds to split data into Grow and Prune for IREP */  private int m_Folds = 3;      /** The number of shuffles performed on the data for randomization */  private int m_Shuffle = 1;  /** Random object for randomization */  private Random m_Random = null;      /** The seed to perform randomization */  private int m_Seed = 1;  /** Whether use error rate on all the data */  private boolean m_IsAllErr = false;  /** Whether use majority class as default class */  private boolean m_IsMajority = false;      /** The root of Ridor */  private Ridor_node m_Root = null;      /** The class attribute of the data */  private Attribute m_Class;  /** Statistics of the data */  private double m_Cover, m_Err;  /** The minimal number of instance weights within a split*/  private double m_MinNo = 2.0;      /**    * Private class implementing the single node of Ridor.    * It consists of a default class label, a set of exceptions to the default rule   * and the exceptions to each exception   */  private class Ridor_node implements Serializable {	    /** The default class label */    private double defClass = Double.NaN;	    /** The set of exceptions of the default rule. 	Each element also has its own exceptions and the consequent of each rule 	is determined by its exceptions */    private RidorRule[] rules = null;	    /** The exceptions of the exception rules */    private Ridor_node[] excepts = null;     /** The level of this node */    private int level;    /** "Get" member functions */    public double getDefClass() { return defClass; }    public RidorRule[] getRules() { return rules; }    public Ridor_node[] getExcepts() { return excepts; }    /**     * Builds a ripple-down manner rule learner.     *     * @param dataByClass the divided data by their class label. The real class     * labels of the instances are all set to 0     * @param lvl the level of the parent node     * @exception Exception if ruleset of this node cannot be built     */    public void findRules(Instances[] dataByClass, int lvl) throws Exception {      Vector finalRules = null;      int clas = -1;      double[] isPure = new double[dataByClass.length];      int numMajority = 0;	          level = lvl + 1;	          for(int h=0; h < dataByClass.length; h++){	isPure[h] = dataByClass[h].sumOfWeights();	if(Utils.grOrEq(isPure[h], m_Folds))	  numMajority++;  // Count how many class labels have enough instances      }	          if(numMajority <= 1){	                   // The data is pure or not enough	defClass = (double)Utils.maxIndex(isPure);	return;      }      double total = Utils.sum(isPure);	 	          if(m_IsMajority){	defClass = (double)Utils.maxIndex(isPure);	Instances data = new Instances(dataByClass[(int)defClass]);	int index = data.classIndex();			for(int j=0; j<data.numInstances(); j++)	  data.instance(j).setClassValue(1);       // Set one class as default			for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset	  if(k != (int)defClass){	    if(data.numInstances() >= dataByClass[k].numInstances())	      data = append(data, dataByClass[k]);	    else data = append(dataByClass[k], data);	  }			data.setClassIndex(index);           // Position new class label			double classCount = total - isPure[(int)defClass];	finalRules = new Vector();	buildRuleset(data, classCount, finalRules);	if(finalRules.size() == 0)           // No good rules built	  return;      }      else{	double maxAcRt = isPure[Utils.maxIndex(isPure)] / total;			// Find default class	for(int i=0; i < dataByClass.length; i++){	  if(isPure[i] >= m_Folds){	    Instances data = new Instances(dataByClass[i]);	    int index = data.classIndex();				    for(int j=0; j<data.numInstances(); j++)	      data.instance(j).setClassValue(1);       // Set one class as default				    for(int k=0; k < dataByClass.length; k++)    // Merge into one dataset	      if(k != i){		if(data.numInstances() >= dataByClass[k].numInstances())		  data = append(data, dataByClass[k]);		else data = append(dataByClass[k], data);	      }				    data.setClassIndex(index);           // Position new class label 				    /* Build a set of rules */	    double classCount = data.sumOfWeights() - isPure[i];	    Vector ruleset = new Vector();	    double wAcRt = buildRuleset(data, classCount, ruleset); 				    if(Utils.gr(wAcRt, maxAcRt)){	      finalRules = ruleset;	      maxAcRt = wAcRt;	      clas = i;	    }	  }	}			if(finalRules == null){ // No good rules found, set majority class as default	  defClass = (double)Utils.maxIndex(isPure);	  return;	}			defClass = (double)clas;      }			      /* Store the exception rules and default class in this node */      int size = finalRules.size();      rules = new RidorRule[size];      excepts = new Ridor_node[size];      for(int l=0; l < size; l++)	rules[l] = (RidorRule)finalRules.elementAt(l);	          /* Build exceptions for each exception rule */      Instances[] uncovered = dataByClass;       if(level == 1)  // The error of default rule	m_Err = total - uncovered[(int)defClass].sumOfWeights();			      uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0);    	          for(int m=0; m < size; m++){	/* The data covered by this rule, they are also deducted from the original data */	Instances[][] dvdData = divide(rules[m], uncovered);	Instances[] covered = dvdData[0];    // Data covered by the rule	//uncovered = dvdData[1];            // Data not covered by the rule	excepts[m] = new Ridor_node();	excepts[m].findRules(covered, level);// Find exceptions on the covered data      }    }    /**	     * Private function to build a rule set and return the weighted avg of accuracy     * rate of rules in the set.     *     * @param insts the data used to build ruleset     * @param classCount the counts of the instances with the predicted class but not     *                   yet covered by the ruleset     * @param ruleset the ruleset to be built     * @return the weighted accuracy rate of the ruleset     * @exception if the rules cannot be built properly     */    private double buildRuleset(Instances insts, double classCount, Vector ruleset)       throws Exception {	          Instances data = new Instances(insts);      double wAcRt = 0;  // The weighted accuracy rate of this ruleset      double total = data.sumOfWeights();	          while( classCount >= m_Folds ){      // Data is not pure	RidorRule bestRule = null;	double bestWorthRate= -1;        // The best worth achieved by	double bestWorth = -1;           // randomization of the data			RidorRule rule = new RidorRule();                                	rule.setPredictedClass(0);       // Predict the classes other than default	if(m_Random == null)	  m_Random = new Random(m_Seed);			for(int j = 0; j < m_Shuffle; j++){	  if(m_Shuffle > 1)	    data.randomize(m_Random);		    	  rule.buildClassifier(data);		    	  double wr, w; // Worth rate and worth	  if(m_IsAllErr){	    wr = (rule.getWorth()+rule.getAccuG()) / 	      (rule.getCoverP()+rule.getCoverG());	    w = rule.getWorth() + rule.getAccuG();	  }	  else{	    wr = rule.getWorthRate();	    w = rule.getWorth(); 	  }		    	  if(Utils.gr(wr, bestWorthRate) ||	     (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){	    bestRule = rule;	    bestWorthRate = wr;	    bestWorth = w;	  }	}			if (bestRule == null)	  throw new Exception("Something wrong here inside findRule()!");			if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds()))	  break;                       // No more good rules generated			Instances newData = new Instances(data); 	data = new Instances(newData, 0);// Empty the data	classCount = 0;	double cover = 0;                // Coverage of this rule on whole data			for(int l=0; l<newData.numInstances(); l++){	  Instance datum = newData.instance(l);	  if(!bestRule.isCover(datum)){// Data not covered by the previous rule	    data.add(datum);	    if(Utils.eq(datum.classValue(), 0)) 	      classCount += datum.weight(); // The predicted class in the data	  }	  else cover += datum.weight();	}						wAcRt += computeWeightedAcRt(bestWorthRate, cover, total);	ruleset.addElement(bestRule);			      }  	          /* The weighted def. accuracy */      double wDefAcRt = (data.sumOfWeights()-classCount) / total;		          wAcRt += wDefAcRt;	          return wAcRt;    }	    /**     * Private function to combine two data     *     * @param data1 the data to which data2 is appended      * @param data2 the data to be appended to data1     * @return the merged data     */    private Instances append(Instances data1, Instances data2){      Instances data = new Instances(data1);      for(int i=0; i<data2.numInstances(); i++)	data.add(data2.instance(i));	          return data;    }	    /**     * Compute the weighted average of accuracy rate of a certain rule     * Each rule is weighted by its coverage proportion in the whole data.       * So the accuracy rate of one ruleset is actually      *      * (worth rate) * (coverage proportion)     *     *                               coverage of the rule on the whole data     * where coverage proportion = -----------------------------------------     *                              the whole data size fed into the ruleset     *     * @param worthRt the worth rate     * @param cover the coverage of the rule on the whole data     * @param total the total data size fed into the ruleset     * @return the weighted accuracy rate of this rule     */    private double computeWeightedAcRt(double worthRt, double cover, double total){	        return (worthRt * (cover/total));	    }	    /**     * Builds an array of data according to their true class label     * Each bag of data is filtered through the rule specified and     * is totally covered by this rule.       * Both the data covered and uncovered by the rule will be returned     * by the procedure.       *     * @param rule the rule covering the data     * @param dataByClass the array of data to be covered by the rule     * @return the arrays of data both covered and not covered by the rule     */    private Instances[][] divide(RidorRule rule, Instances[] dataByClass){      int len = dataByClass.length;      Instances[][] dataBags = new Instances[2][len];	          for(int i=0; i < len; i++){	Instances[] dvdData = rule.coveredByRule(dataByClass[i]);	dataBags[0][i] = dvdData[0];     // Covered by the rule	dataBags[1][i] = dvdData[1];     // Not covered by the rule      }	          return dataBags;    }    /**     * The size of the certain node of Ridor, i.e. the      * number of rules generated within and below this node     *     * @return the size of this node     */    public int size(){      int size = 0;      if(rules != null){	for(int i=0; i < rules.length; i++)	  size += excepts[i].size(); // The children's size	size += rules.length;          // This node's size      }      return size;    }	    /**     * Prints the all the rules of one node of Ridor.     *     * @return a textual description of one node of Ridor     */    public String toString(){      StringBuffer text =  new StringBuffer();	          if(level == 1)	text.append(m_Class.name() + " = " + m_Class.value((int)getDefClass())+		    "  ("+m_Cover+"/"+m_Err+")\n");      if(rules != null){	for(int i=0; i < rules.length; i++){	  for(int j=0; j < level; j++)	    text.append("         ");	  String cl = m_Class.value((int)(excepts[i].getDefClass()));	  text.append("  Except " + 		      rules[i].toString(m_Class.name(), cl)+		      "\n" + excepts[i].toString());	}      }	          return text.toString();    }  }      /**   * This class implements a single rule that predicts the 2-class distribution.     *   * A rule consists of antecedents "AND"ed together and the consequent (class value)    * for the classification.  In this case, the consequent is the distribution of   * the available classes (always 2 classes) in the dataset.     * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to select    * an antecedent and Reduced Error Prunning (REP) is used to prune the rule.    *   */      private class RidorRule implements WeightedInstancesHandler{	    /** The internal representation of the class label to be predicted*/    private double m_Class = -1;		    /** The class attribute of the data*/    private Attribute m_ClassAttribute;	    /** The vector of antecedents of this rule*/    protected FastVector m_Antds = null;	    /** The worth rate of this rule, in this case, accuracy rate in the pruning data*/    private double m_WorthRate = 0;	    /** The worth value of this rule, in this case, accurate # in pruning data*/    private double m_Worth = 0;	    /** The sum of weights of the data covered by this rule in the pruning data */    private double m_CoverP = 0;   	    /** The accurate and covered data of this rule in the growing data */    private double m_CoverG = 0, m_AccuG = 0;   	      /** The access functions for parameters */    public void setPredictedClass(double cl){  m_Class = cl; }    public double getPredictedClass(){ return m_Class; }	    /**     * Builds a single rule learner with REP dealing with 2 classes.     * This rule learner always tries to predict the class with label      * m_Class.     *     * @param instances the training data     * @exception Exception if classifier can't be built successfully     */    public void buildClassifier(Instances instances) throws Exception {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -