📄 jrip.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String debugTipText() {
    return "Whether debug information is output to the console.";
  }

  public void setDebug(boolean d){m_Debug = d;}
  public boolean getDebug(){ return m_Debug; }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String checkErrorRateTipText() {
    return "Whether check for error rate >= 1/2 is included" +
      " in stopping criterion.";
  }

  public void setCheckErrorRate(boolean d){ m_CheckErr = d;}
  public boolean getCheckErrorRate(){ return m_CheckErr; }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String usePruningTipText() {
    return "Whether pruning is performed.";
  }

  public void setUsePruning(boolean d){ m_UsePruning = d;}
  public boolean getUsePruning(){ return m_UsePruning; }
    
  /** 
   * Get the ruleset generated by Ripper 
   *
   * @return the ruleset
   */
  public FastVector getRuleset(){ return m_Ruleset; }

  /** 
   * Get the statistics of the ruleset in the given position
   *
   * @param pos the position of the stats, assuming correct
   */
  public RuleStats getRuleStats(int pos) {
    return (RuleStats)m_RulesetStats.elementAt(pos);
  }
    
  /** 
   * The single antecedent in the rule, which is composed of an attribute and 
   * the corresponding value.  There are two inherited classes, namely NumericAntd
   * and NominalAntd in which the attributes are numeric and nominal respectively.
   */    
  private abstract class Antd 
    implements WeightedInstancesHandler, Copyable, Serializable {
	
    /* The attribute of the antecedent */
    protected Attribute att;
	
    /* The attribute value of the antecedent.  
       For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
    protected double value; 
	
    /* The maximum infoGain achieved by this antecedent test 
     * in the growing data */
    protected double maxInfoGain;
	
    /* The accurate rate of this antecedent test on the growing data */
    protected double accuRate;
	
    /* The coverage of this antecedent in the growing data */
    protected double cover;
	
    /* The accurate data for this antecedent in the growing data */
    protected double accu;
	
    /* Constructor*/
    public Antd(Attribute a){
      att=a;
      value=Double.NaN; 
      maxInfoGain = 0;
      accuRate = Double.NaN;
      cover = Double.NaN;
      accu = Double.NaN;
    }
	
    /* The abstract members for inheritance */
    public abstract Instances[] splitData(Instances data, double defAcRt, 
					  double cla);
    public abstract boolean covers(Instance inst);
    public abstract String toString();
	
    /** Implements Copyable */
    public abstract Object copy(); 
	   
    /* Get functions of this antecedent */
    public Attribute getAttr(){ return att; }
    public double getAttrValue(){ return value; }
    public double getMaxInfoGain(){ return maxInfoGain; }
    public double getAccuRate(){ return accuRate; } 
    public double getAccu(){ return accu; } 
    public double getCover(){ return cover; } 
  }
    
  /** 
   * The antecedent with numeric attribute
   */
  private class NumericAntd extends Antd{
	
    /* The split point for this numeric antecedent */
    private double splitPoint;
    
    /* Constructor*/
    public NumericAntd(Attribute a){ 
      super(a);
      splitPoint = Double.NaN;
    }    
	
    /* Get split point of this numeric antecedent */
    public double getSplitPoint(){ return splitPoint; }
	
    /** Implements Copyable */
    public Object copy(){ 
      NumericAntd na = new NumericAntd(getAttr());
      na.value = this.value;
      na.splitPoint = this.splitPoint;
      return na;
    }
	
    /**
     * Implements the splitData function.  
     * This procedure is to split the data into two bags according 
     * to the information gain of the numeric attribute value
     * The maximum infoGain is also calculated.  
     * 
     * @param insts the data to be split
     * @param defAcRt the default accuracy rate for data
     * @param cl the class label to be predicted
     * @return the array of data after split
     */
    public Instances[] splitData(Instances insts, double defAcRt, 
				 double cl){
	Instances data = insts;
      int total=data.numInstances();// Total number of instances without 
      // missing value for att
	    
      int split=1;                  // Current split position
      int prev=0;                   // Previous split position
      int finalSplit=split;         // Final split position
      maxInfoGain = 0;
      value = 0;	
	    
      double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
	    
      data.sort(att);
      // Find the las instance without missing value 
      for(int x=0; x<data.numInstances(); x++){
	Instance inst = data.instance(x);
	if(inst.isMissing(att)){
	  total = x;
	  break;
	}
		
	sndCover += inst.weight();
	if(Utils.eq(inst.classValue(), cl))
	  sndAccu += inst.weight();		
      }	    

      if(total == 0) return null; // Data all missing for the attribute
      splitPoint = data.instance(total-1).value(att);	
	    
      for(; split <= total; split++){
	if((split == total) ||
	   (data.instance(split).value(att) > // Can't split within
	    data.instance(prev).value(att))){ // same value	    
		    
	  for(int y=prev; y<split; y++){
	    Instance inst = data.instance(y);
	    fstCover += inst.weight(); 
	    if(Utils.eq(data.instance(y).classValue(), cl)){
	      fstAccu += inst.weight();  // First bag positive# ++
	    }	     		   
	  }
		    
	  double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
	    sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
		    
	  /* Which bag has higher information gain? */
	  boolean isFirst; 
	  double fstInfoGain, sndInfoGain;
	  double accRate, infoGain, coverage, accurate;
		    
	  fstInfoGain = 
	    //Utils.eq(defAcRt, 1.0) ? 
	    //fstAccu/(double)numConds : 
	    fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
		    
	  sndInfoGain = 
	    //Utils.eq(defAcRt, 1.0) ? 
	    //sndAccu/(double)numConds : 
	    sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
		    
	  if(fstInfoGain > sndInfoGain){
	    isFirst = true;
	    infoGain = fstInfoGain;
	    accRate = fstAccuRate;
	    accurate = fstAccu;
	    coverage = fstCover;
	  }
	  else{
	    isFirst = false;
	    infoGain = sndInfoGain;
	    accRate = sndAccuRate;
	    accurate = sndAccu;
	    coverage = sndCover;
	  }
		    
	  /* Check whether so far the max infoGain */
	  if(infoGain > maxInfoGain){
	    splitPoint = data.instance(prev).value(att);
	    value = (isFirst) ? 0 : 1;
	    accuRate = accRate;
	    accu = accurate;
	    cover = coverage;
	    maxInfoGain = infoGain;
	    finalSplit = (isFirst) ? split : prev;
	  }
		    
	  for(int y=prev; y<split; y++){
	    Instance inst = data.instance(y);
	    sndCover -= inst.weight(); 
	    if(Utils.eq(data.instance(y).classValue(), cl)){
	      sndAccu -= inst.weight();  // Second bag positive# --
	    }	     		   
	  }		    
	  prev=split;
	}
      }
	    
      /* Split the data */
      Instances[] splitData = new Instances[2];
      splitData[0] = new Instances(data, 0, finalSplit);
      splitData[1] = new Instances(data, finalSplit, total-finalSplit);
	    
      return splitData;
    }
	
    /**
     * Whether the instance is covered by this antecedent
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance is covered
     *         by this antecedent
     */
    public boolean covers(Instance inst){
      boolean isCover=true;
      if(!inst.isMissing(att)){
	if((int)value == 0){ // First bag
	  if(inst.value(att) > splitPoint)
	    isCover=false;
	}
	else if(inst.value(att) < splitPoint) // Second bag
	  isCover=false;
      }
      else
	isCover = false;
	    
      return isCover;
    }
	
    /**
     * Prints this antecedent
     *
     * @return a textual description of this antecedent
     */
    public String toString() {
      String symbol = ((int)value == 0) ? " <= " : " >= ";
      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
    }   
  }
    
    
  /** 
   * The antecedent with nominal attribute
   */
  private class NominalAntd extends Antd{
	
    /* The parameters of infoGain calculated for each attribute value
     * in the growing data */
    private double[] accurate;
    private double[] coverage;
	
    /* Constructor*/
    public NominalAntd(Attribute a){ 
      super(a);    
      int bag = att.numValues();
      accurate = new double[bag];
      coverage = new double[bag];
    }   

    /** Implements Copyable */
    public Object copy(){
      Antd antec = new NominalAntd(getAttr());
      antec.value = this.value;
      return antec;	    
    }
	
    /**
     * Implements the splitData function.  
     * This procedure is to split the data into bags according 
     * to the nominal attribute value
     * The infoGain for each bag is also calculated.  
     * 
     * @param data the data to be split
     * @param defAcRt the default accuracy rate for data
     * @param cl the class label to be predicted
     * @return the array of data after split
     */
    public Instances[] splitData(Instances data, double defAcRt, 
				 double cl){
      int bag = att.numValues();
      Instances[] splitData = new Instances[bag];
	    
      for(int x=0; x<bag; x++){
	splitData[x] = new Instances(data, data.numInstances());
	accurate[x] = 0;
	coverage[x] = 0;
      }
	    
      for(int x=0; x<data.numInstances(); x++){
	Instance inst=data.instance(x);
	if(!inst.isMissing(att)){
	  int v = (int)inst.value(att);
	  splitData[v].add(inst);
	  coverage[v] += inst.weight();
	  if((int)inst.classValue() == (int)cl)
	    accurate[v] += inst.weight();
	}
      }
	    
      for(int x=0; x<bag; x++){
	double t = coverage[x]+1.0;
	double p = accurate[x] + 1.0;		
	double infoGain = 
	  //Utils.eq(defAcRt, 1.0) ? 
	  //accurate[x]/(double)numConds : 
	  accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
		
	if(infoGain > maxInfoGain){
	  maxInfoGain = infoGain;
	  cover = coverage[x];
	  accu = accurate[x];
	  accuRate = p/t;
	  value = (double)x;
	}
      }
	    
      return splitData;
    }
	
    /**
     * Whether the instance is covered by this antecedent
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance is
     *         covered by this antecedent
     */
    public boolean covers(Instance inst){
      boolean isCover=false;
      if(!inst.isMissing(att)){
	if((int)inst.value(att) == (int)value)
	  isCover=true;	    
      }
      return isCover;
    }
	
    /**
     * Prints this antecedent
     *
     * @return a textual description of this antecedent
     */
    public String toString() {
      return (att.name() + " = " +att.value((int)value));
    } 
  }

    
  /**
   * This class implements a single rule that predicts specified class.  
   *
   * A rule consists of antecedents "AND"ed together and the consequent 
   * (class value) for the classification.  
   * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
   * select an antecedent and Reduced Error Prunning (REP) with the metric
   * of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule. 
   */
    
  protected class RipperRule extends Rule{
	
    /** The internal representation of the class label to be predicted*/
    private double m_Consequent = -1;	
		
    /** The vector of antecedents of this rule*/
    protected FastVector m_Antds = null;	
	
    public void setConsequent(double cl){  m_Consequent = cl; }
    public double getConsequent(){ return m_Consequent; }
	
    /** Constructor */
    public RipperRule(){    
      m_Antds = new FastVector();	
    }
	
    /**
     * Get a shallow copy of this rule
     *
     * @return the copy
     */
    public Object copy(){
      RipperRule copy = new RipperRule();
      copy.setConsequent(getConsequent());
      copy.m_Antds = (FastVector)this.m_Antds.copyElements();
      return copy;
    }
	
    /**
     * Whether the instance covered by this rule
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance 
     *         is covered by this rule
     */
    public boolean covers(Instance datum){
      boolean isCover=true;
	    
      for(int i=0; i<m_Antds.size(); i++){
	Antd antd = (Antd)m_Antds.elementAt(i);
	if(!antd.covers(datum)){
	  isCover = false;
	  break;
	}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -