📄 ridor.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	m_Worth = worthValue[antdsSize-1];       // WorthValues of the last antecedent
	m_WorthRate = worthRt[antdsSize-1];
	m_CoverP = coverage[antdsSize-1];
	Antd last = (Antd)m_Antds.lastElement();
	m_CoverG = last.getCover();
	m_AccuG = last.getAccu();
      }
      else{                                        // Default rule    
	m_Worth = defAccu;                       // Default WorthValues
	m_WorthRate = defAccuRate;
	m_CoverP = total;
      }
    }
	
    /**
     * Private function to compute default number of accurate instances
     * in the specified data for m_Class
     * 
     * @param data the data in question
     * @return the default accuracy number
     */
    private double computeDefAccu(Instances data){ 
      double defAccu=0;
      for(int i=0; i<data.numInstances(); i++){
	Instance inst = data.instance(i);
	if(Utils.eq(inst.classValue(), m_Class))
	  defAccu += inst.weight();
      }
      return defAccu;
    }
	
    /** The following are get functions after prune() has set the value of worthRate and worth*/
    public double getWorthRate(){ return m_WorthRate; }
    public double getWorth(){ return m_Worth; }
    public double getCoverP(){ return m_CoverP; }
    public double getCoverG(){ return m_CoverG; }
    public double getAccuG(){ return m_AccuG; }

    /**
     * Prints this rule with the specified class label
     *
     * @param att the string standing for attribute in the consequent of this rule
     * @param cl the string standing for value in the consequent of this rule
     * @return a textual description of this rule with the specified class label
     */
    public String toString(String att, String cl) {
      StringBuffer text =  new StringBuffer();
      if(m_Antds.size() > 0){
	for(int j=0; j< (m_Antds.size()-1); j++)
	  text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
      }
      text.append(" => " + att + " = " + cl);
      text.append("  ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+
		  m_CoverP+"/"+(m_CoverP - m_Worth)+"]");
      return text.toString();
    }
	
    /**
     * Prints this rule
     *
     * @return a textual description of this rule
     */
    public String toString() {
      return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class));
    }        
  }
    
    
  /** 
   * The single antecedent in the rule, which is composed of an attribute and 
   * the corresponding value.  There are two inherited classes, namely NumericAntd
   * and NominalAntd in which the attributes are numeric and nominal respectively.
   */
    
  private abstract class Antd implements Serializable {
    /* The attribute of the antecedent */
    protected Attribute att;
	
    /* The attribute value of the antecedent.  
       For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
    protected double value; 
	
    /* The maximum infoGain achieved by this antecedent test */
    protected double maxInfoGain;
	
    /* The accurate rate of this antecedent test on the growing data */
    protected double accuRate;
	
    /* The coverage of this antecedent */
    protected double cover;
	
    /* The accurate data for this antecedent */
    protected double accu;
	
    /* Constructor*/
    public Antd(Attribute a){
      att=a;
      value=Double.NaN; 
      maxInfoGain = 0;
      accuRate = Double.NaN;
      cover = Double.NaN;
      accu = Double.NaN;
    }
	
    /* The abstract members for inheritance */
    public abstract Instances[] splitData(Instances data, double defAcRt, double cla);
    public abstract boolean isCover(Instance inst);
    public abstract String toString();
	
    /* Get functions of this antecedent */
    public Attribute getAttr(){ return att; }
    public double getAttrValue(){ return value; }
    public double getMaxInfoGain(){ return maxInfoGain; }
    public double getAccuRate(){ return accuRate; } 
    public double getAccu(){ return accu; } 
    public double getCover(){ return cover; } 
  }
    
  /** 
   * The antecedent with numeric attribute
   */
  private class NumericAntd extends Antd{
	
    /* The split point for this numeric antecedent */
    private double splitPoint;
	
    /* Constructor*/
    public NumericAntd(Attribute a){ 
      super(a);
      splitPoint = Double.NaN;
    }    
	
    /* Get split point of this numeric antecedent */
    public double getSplitPoint(){ return splitPoint; }
	
    /**
     * Implements the splitData function.  
     * This procedure is to split the data into two bags according 
     * to the information gain of the numeric attribute value
     * The maximum infoGain is also calculated.  
     * 
     * @param insts the data to be split
     * @param defAcRt the default accuracy rate for data
     * @param cl the class label to be predicted
     * @return the array of data after split
     */
    public Instances[] splitData(Instances insts, double defAcRt, double cl){
      Instances data = new Instances(insts);
      data.sort(att);
      int total=data.numInstances();// Total number of instances without 
      // missing value for att
	    
      int split=1;                  // Current split position
      int prev=0;                   // Previous split position
      int finalSplit=split;         // Final split position
      maxInfoGain = 0;
      value = 0;	

      // Compute minimum number of Instances required in each split
      double minSplit =  0.1 * (data.sumOfWeights()) / 2.0;
      if (Utils.smOrEq(minSplit,m_MinNo)) 
	minSplit = m_MinNo;
      else if (Utils.gr(minSplit,25)) 
	minSplit = 25;	    
	    
      double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
	    
      for(int x=0; x<data.numInstances(); x++){
	Instance inst = data.instance(x);
	if(inst.isMissing(att)){
	  total = x;
	  break;
	}
		
	sndCover += inst.weight();
	if(Utils.eq(inst.classValue(), cl))
	  sndAccu += inst.weight();
      }
	    
      // Enough Instances with known values?
      if (Utils.sm(sndCover,(2*minSplit)))
	return null;
	    
      if(total == 0) return null; // Data all missing for the attribute 	
      splitPoint = data.instance(total-1).value(att);	
	    
      for(; split < total; split++){
	if(!Utils.eq(data.instance(split).value(att), 
		     data.instance(prev).value(att))){ // Can't split within same value
		    
	  for(int y=prev; y<split; y++){
	    Instance inst = data.instance(y);
	    fstCover += inst.weight(); sndCover -= inst.weight(); 
	    if(Utils.eq(data.instance(y).classValue(), cl)){
	      fstAccu += inst.weight();  // First bag positive# ++
	      sndAccu -= inst.weight();  // Second bag positive# --
	    }	     		   
	  }
		    
	  if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){
	    prev=split;  // Cannot split because either
	    continue;    // split has not enough data
	  }
		    
	  double fstAccuRate = 0, sndAccuRate = 0;
	  if(!Utils.eq(fstCover,0))
	    fstAccuRate = fstAccu/fstCover;		
	  if(!Utils.eq(sndCover,0))
	    sndAccuRate = sndAccu/sndCover;
		    
	  /* Which bag has higher information gain? */
	  boolean isFirst; 
	  double fstInfoGain, sndInfoGain;
	  double accRate, infoGain, coverage, accurate;
		    
	  fstInfoGain = Utils.eq(fstAccuRate, 0) ? 
	    0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt)));
	  sndInfoGain = Utils.eq(sndAccuRate, 0) ? 
	    0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt)));
	  if(Utils.gr(fstInfoGain,sndInfoGain) || 
	     (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){
	    isFirst = true;
	    infoGain = fstInfoGain;
	    accRate = fstAccuRate;
	    accurate = fstAccu;
	    coverage = fstCover;
	  }
	  else{
	    isFirst = false;
	    infoGain = sndInfoGain;
	    accRate = sndAccuRate;
	    accurate = sndAccu;
	    coverage = sndCover;
	  }
		    
	  boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
		    
	  /* Check whether so far the max infoGain */
	  if(isUpdate){
	    splitPoint = (data.instance(split).value(att) + 
			  data.instance(prev).value(att))/2;
	    value = ((isFirst) ? 0 : 1);
	    accuRate = accRate;
	    accu = accurate;
	    cover = coverage;
	    maxInfoGain = infoGain;
	    finalSplit = split;
	  }
	  prev=split;
	}
      }
	    
      /* Split the data */
      Instances[] splitData = new Instances[2];
      splitData[0] = new Instances(data, 0, finalSplit);
      splitData[1] = new Instances(data, finalSplit, total-finalSplit);
	    
      return splitData;
    }
	
    /**
     * Whether the instance is covered by this antecedent
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance is covered 
     *         by this antecedent
     */
    public boolean isCover(Instance inst){
      boolean isCover=false;
      if(!inst.isMissing(att)){
	if(Utils.eq(value, 0)){
	  if(Utils.smOrEq(inst.value(att), splitPoint))
	    isCover=true;
	}
	else if(Utils.gr(inst.value(att), splitPoint))
	  isCover=true;
      }
      return isCover;
    }
	
    /**
     * Prints this antecedent
     *
     * @return a textual description of this antecedent
     */
    public String toString() {
      String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
    }   
  }
    
    
  /** 
   * The antecedent with nominal attribute
   */
  private class NominalAntd extends Antd{
	
    /* The parameters of infoGain calculated for each attribute value */
    private double[] accurate;
    private double[] coverage;
    private double[] infoGain;
	
    /* Constructor*/
    public NominalAntd(Attribute a){ 
      super(a);
      int bag = att.numValues();
      accurate = new double[bag];
      coverage = new double[bag];
      infoGain = new double[bag];
    }   
	
    /**
     * Implements the splitData function.  
     * This procedure is to split the data into bags according 
     * to the nominal attribute value
     * The infoGain for each bag is also calculated.  
     * 
     * @param data the data to be split
     * @param defAcRt the default accuracy rate for data
     * @param cl the class label to be predicted
     * @return the array of data after split
     */
    public Instances[] splitData(Instances data, double defAcRt, double cl){
      int bag = att.numValues();
      Instances[] splitData = new Instances[bag];
	    
      for(int x=0; x<bag; x++){
	accurate[x] = coverage[x] = infoGain[x] = 0;
	splitData[x] = new Instances(data, data.numInstances());
      }
	    
      for(int x=0; x<data.numInstances(); x++){
	Instance inst=data.instance(x);
	if(!inst.isMissing(att)){
	  int v = (int)inst.value(att);
	  splitData[v].add(inst);
	  coverage[v] += inst.weight();
	  if(Utils.eq(inst.classValue(), cl))
	    accurate[v] += inst.weight();
	}
      }
	    
      // Check if >=2 splits have more than the minimal data
      int count=0; 
      for(int x=0; x<bag; x++){
	double t = coverage[x];
	if(Utils.grOrEq(t, m_MinNo)){
	  double p = accurate[x];		
		    
	  if(!Utils.eq(t, 0.0))
	    infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt)));
	  ++count;
	}
      }
	        
      if(count < 2) // Don't split
	return null;
	    
      value = (double)Utils.maxIndex(infoGain);
	    
      cover = coverage[(int)value];
      accu = accurate[(int)value];
	    
      if(!Utils.eq(cover,0))
	accuRate = accu / cover;
      else accuRate = 0;
	    
      maxInfoGain = infoGain [(int)value];
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -