📄 ridor.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	  if(!ins.isMissing(attr)){              // Attribute not missing	    if(antd.isCover(ins)){             // Covered by this antecedent	      coverage[x] += ins.weight();	      data.add(ins);                 // Add to data for further pruning	      if(Utils.eq(ins.classValue(), m_Class)) // Accurate prediction		worthValue[x] += ins.weight();	    }	  }	}			if(coverage[x] != 0)  	  worthRt[x] = worthValue[x]/coverage[x];      }	          /* Prune the antecedents according to the accuracy parameters */      for(int z=(size-1); z > 0; z--)	if(Utils.sm(worthRt[z], worthRt[z-1]))	  m_Antds.removeElementAt(z);	else  break;	          /* Check whether this rule is a default rule */      if(m_Antds.size() == 1){	defAccu = computeDefAccu(pruneData);	defAccuRate = defAccu/total;                // Compute def. accuracy	if(Utils.sm(worthRt[0], defAccuRate)){      // Becomes a default rule	  m_Antds.removeAllElements();	}      }   	          /* Update the worth parameters of this rule*/      int antdsSize = m_Antds.size();      if(antdsSize != 0){                          // Not a default rule	m_Worth = worthValue[antdsSize-1];       // WorthValues of the last antecedent	m_WorthRate = worthRt[antdsSize-1];	m_CoverP = coverage[antdsSize-1];	Antd last = (Antd)m_Antds.lastElement();	m_CoverG = last.getCover();	m_AccuG = last.getAccu();      }      else{                                        // Default rule    	m_Worth = defAccu;                       // Default WorthValues	m_WorthRate = defAccuRate;	m_CoverP = total;      }    }	    /**     * Private function to compute default number of accurate instances     * in the specified data for m_Class     *      * @param data the data in question     * @return the default accuracy number     */    private double computeDefAccu(Instances data){       double defAccu=0;      for(int i=0; i<data.numInstances(); i++){	Instance inst = data.instance(i);	if(Utils.eq(inst.classValue(), m_Class))	  defAccu += inst.weight();      }      return defAccu;    }	    /** The following are get functions after prune() has set the value of worthRate and worth*/    public double getWorthRate(){ return m_WorthRate; }    public double getWorth(){ return m_Worth; }    public double getCoverP(){ return m_CoverP; }    public double getCoverG(){ return m_CoverG; }    public double getAccuG(){ return m_AccuG; }    /**     * Prints this rule with the specified class label     *     * @param att the string standing for attribute in the consequent of this rule     * @param cl the string standing for value in the consequent of this rule     * @return a textual description of this rule with the specified class label     */    public String toString(String att, String cl) {      StringBuffer text =  new StringBuffer();      if(m_Antds.size() > 0){	for(int j=0; j< (m_Antds.size()-1); j++)	  text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");      }      text.append(" => " + att + " = " + cl);      text.append("  ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+		  m_CoverP+"/"+(m_CoverP - m_Worth)+"]");      return text.toString();    }	    /**     * Prints this rule     *     * @return a textual description of this rule     */    public String toString() {      return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class));    }          }          /**    * The single antecedent in the rule, which is composed of an attribute and    * the corresponding value.  There are two inherited classes, namely NumericAntd   * and NominalAntd in which the attributes are numeric and nominal respectively.   */  private abstract class Antd     implements Serializable {        /** The attribute of the antecedent */    protected Attribute att;	    /** The attribute value of the antecedent.         For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */    protected double value; 	    /** The maximum infoGain achieved by this antecedent test */    protected double maxInfoGain;	    /** The accurate rate of this antecedent test on the growing data */    protected double accuRate;	    /** The coverage of this antecedent */    protected double cover;	    /** The accurate data for this antecedent */    protected double accu;	    /** Constructor*/    public Antd(Attribute a){      att=a;      value=Double.NaN;       maxInfoGain = 0;      accuRate = Double.NaN;      cover = Double.NaN;      accu = Double.NaN;    }	    /* The abstract members for inheritance */    public abstract Instances[] splitData(Instances data, double defAcRt, double cla);    public abstract boolean isCover(Instance inst);    public abstract String toString();	    /* Get functions of this antecedent */    public Attribute getAttr(){ return att; }    public double getAttrValue(){ return value; }    public double getMaxInfoGain(){ return maxInfoGain; }    public double getAccuRate(){ return accuRate; }     public double getAccu(){ return accu; }     public double getCover(){ return cover; }   }      /**    * The antecedent with numeric attribute   */  private class NumericAntd     extends Antd {        /** for serialization */    static final long serialVersionUID = 1968761518014492214L;	    /** The split point for this numeric antecedent */    private double splitPoint;	    /** Constructor*/    public NumericAntd(Attribute a){       super(a);      splitPoint = Double.NaN;    }    	    /** Get split point of this numeric antecedent */    public double getSplitPoint(){ return splitPoint; }	    /**     * Implements the splitData function.       * This procedure is to split the data into two bags according      * to the information gain of the numeric attribute value     * The maximum infoGain is also calculated.       *      * @param insts the data to be split     * @param defAcRt the default accuracy rate for data     * @param cl the class label to be predicted     * @return the array of data after split     */    public Instances[] splitData(Instances insts, double defAcRt, double cl){      Instances data = new Instances(insts);      data.sort(att);      int total=data.numInstances();// Total number of instances without       // missing value for att	          int split=1;                  // Current split position      int prev=0;                   // Previous split position      int finalSplit=split;         // Final split position      maxInfoGain = 0;      value = 0;	      // Compute minimum number of Instances required in each split      double minSplit =  0.1 * (data.sumOfWeights()) / 2.0;      if (Utils.smOrEq(minSplit,m_MinNo)) 	minSplit = m_MinNo;      else if (Utils.gr(minSplit,25)) 	minSplit = 25;	    	          double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;	          for(int x=0; x<data.numInstances(); x++){	Instance inst = data.instance(x);	if(inst.isMissing(att)){	  total = x;	  break;	}			sndCover += inst.weight();	if(Utils.eq(inst.classValue(), cl))	  sndAccu += inst.weight();      }	          // Enough Instances with known values?      if (Utils.sm(sndCover,(2*minSplit)))	return null;	          if(total == 0) return null; // Data all missing for the attribute 	      splitPoint = data.instance(total-1).value(att);		          for(; split < total; split++){	if(!Utils.eq(data.instance(split).value(att), 		     data.instance(prev).value(att))){ // Can't split within same value		    	  for(int y=prev; y<split; y++){	    Instance inst = data.instance(y);	    fstCover += inst.weight(); sndCover -= inst.weight(); 	    if(Utils.eq(data.instance(y).classValue(), cl)){	      fstAccu += inst.weight();  // First bag positive# ++	      sndAccu -= inst.weight();  // Second bag positive# --	    }	     		   	  }		    	  if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){	    prev=split;  // Cannot split because either	    continue;    // split has not enough data	  }		    	  double fstAccuRate = 0, sndAccuRate = 0;	  if(!Utils.eq(fstCover,0))	    fstAccuRate = fstAccu/fstCover;			  if(!Utils.eq(sndCover,0))	    sndAccuRate = sndAccu/sndCover;		    	  /* Which bag has higher information gain? */	  boolean isFirst; 	  double fstInfoGain, sndInfoGain;	  double accRate, infoGain, coverage, accurate;		    	  fstInfoGain = Utils.eq(fstAccuRate, 0) ? 	    0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt)));	  sndInfoGain = Utils.eq(sndAccuRate, 0) ? 	    0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt)));	  if(Utils.gr(fstInfoGain,sndInfoGain) || 	     (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){	    isFirst = true;	    infoGain = fstInfoGain;	    accRate = fstAccuRate;	    accurate = fstAccu;	    coverage = fstCover;	  }	  else{	    isFirst = false;	    infoGain = sndInfoGain;	    accRate = sndAccuRate;	    accurate = sndAccu;	    coverage = sndCover;	  }		    	  boolean isUpdate = Utils.gr(infoGain, maxInfoGain);		    	  /* Check whether so far the max infoGain */	  if(isUpdate){	    splitPoint = (data.instance(split).value(att) + 			  data.instance(prev).value(att))/2;	    value = ((isFirst) ? 0 : 1);	    accuRate = accRate;	    accu = accurate;	    cover = coverage;	    maxInfoGain = infoGain;	    finalSplit = split;	  }	  prev=split;	}      }	          /* Split the data */      Instances[] splitData = new Instances[2];      splitData[0] = new Instances(data, 0, finalSplit);      splitData[1] = new Instances(data, finalSplit, total-finalSplit);	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is covered      *         by this antecedent     */    public boolean isCover(Instance inst){      boolean isCover=false;      if(!inst.isMissing(att)){	if(Utils.eq(value, 0)){	  if(Utils.smOrEq(inst.value(att), splitPoint))	    isCover=true;	}	else if(Utils.gr(inst.value(att), splitPoint))	  isCover=true;      }      return isCover;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));    }     }          /**    * The antecedent with nominal attribute   */  private class NominalAntd     extends Antd {        /** for serialization */    static final long serialVersionUID = -256386137196078004L;	    /* The parameters of infoGain calculated for each attribute value */    private double[] accurate;    private double[] coverage;    private double[] infoGain;	    /** Constructor*/    public NominalAntd(Attribute a){       super(a);      int bag = att.numValues();      accurate = new double[bag];      coverage = new double[bag];      infoGain = new double[bag];    }   	    /**     * Implements the splitData function.       * This procedure is to split the data into bags according      * to the nominal attribute value     * The infoGain for each bag is also calculated.       *      * @param data the data to be split     * @param defAcRt the default accuracy rate for data     * @param cl the class label to be predicted     * @return the array of data after split     */    public Instances[] splitData(Instances data, double defAcRt, double cl){      int bag = att.numValues();      Instances[] splitData = new Instances[bag];	          for(int x=0; x<bag; x++){	accurate[x] = coverage[x] = infoGain[x] = 0;	splitData[x] = new Instances(data, data.numInstances());      }	          for(int x=0; x<data.numInstances(); x++){	Instance inst=data.instance(x);	if(!inst.isMissing(att)){	  int v = (int)inst.value(att);	  splitData[v].add(inst);	  coverage[v] += inst.weight();	  if(Utils.eq(inst.classValue(), cl))	    accurate[v] += inst.weight();	}      }	          // Check if >=2 splits have more than the minimal data      int count=0;       for(int x=0; x<bag; x++){	double t = coverage[x];	if(Utils.grOrEq(t, m_MinNo)){	  double p = accurate[x];				    	  if(!Utils.eq(t, 0.0))	    infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt)));	  ++count;	}      }	              if(count < 2) // Don't split	return null;	          value = (double)Utils.maxIndex(infoGain);	          cover = coverage[(int)value];      accu = accurate[(int)value];	          if(!Utils.eq(cover,0))	accuRate = accu / cover;      else accuRate = 0;	          maxInfoGain = infoGain [(int)value];
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -