jrip.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,656 行 · 第 1/4 页
JAVA
1,656 行
    /* Get functions of this antecedent */    public Attribute getAttr(){ return att; }    public double getAttrValue(){ return value; }    public double getMaxInfoGain(){ return maxInfoGain; }    public double getAccuRate(){ return accuRate; }     public double getAccu(){ return accu; }     public double getCover(){ return cover; }   }      /**    * The antecedent with numeric attribute   */  private class NumericAntd extends Antd{	    /* The split point for this numeric antecedent */    private double splitPoint;        /* Constructor*/    public NumericAntd(Attribute a){       super(a);      splitPoint = Double.NaN;    }    	    /* Get split point of this numeric antecedent */    public double getSplitPoint(){ return splitPoint; }	    /** Implements Copyable */    public Object copy(){       NumericAntd na = new NumericAntd(getAttr());      na.value = this.value;      na.splitPoint = this.splitPoint;      return na;    }	    /**     * Implements the splitData function.       * This procedure is to split the data into two bags according      * to the information gain of the numeric attribute value     * The maximum infoGain is also calculated.       *      * @param insts the data to be split     * @param defAcRt the default accuracy rate for data     * @param cl the class label to be predicted     * @return the array of data after split     */    public Instances[] splitData(Instances insts, double defAcRt, 				 double cl){	Instances data = insts;      int total=data.numInstances();// Total number of instances without       // missing value for att	          int split=1;                  // Current split position      int prev=0;                   // Previous split position      int finalSplit=split;         // Final split position      maxInfoGain = 0;      value = 0;		          double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;	          data.sort(att);      // Find the las instance without missing value       for(int x=0; x<data.numInstances(); x++){	Instance inst = data.instance(x);	if(inst.isMissing(att)){	  total = x;	  break;	}			sndCover += inst.weight();	if(Utils.eq(inst.classValue(), cl))	  sndAccu += inst.weight();		      }	          if(total == 0) return null; // Data all missing for the attribute      splitPoint = data.instance(total-1).value(att);		          for(; split <= total; split++){	if((split == total) ||	   (data.instance(split).value(att) > // Can't split within	    data.instance(prev).value(att))){ // same value	    		    	  for(int y=prev; y<split; y++){	    Instance inst = data.instance(y);	    fstCover += inst.weight(); 	    if(Utils.eq(data.instance(y).classValue(), cl)){	      fstAccu += inst.weight();  // First bag positive# ++	    }	     		   	  }		    	  double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),	    sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);		    	  /* Which bag has higher information gain? */	  boolean isFirst; 	  double fstInfoGain, sndInfoGain;	  double accRate, infoGain, coverage, accurate;		    	  fstInfoGain = 	    //Utils.eq(defAcRt, 1.0) ? 	    //fstAccu/(double)numConds : 	    fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));		    	  sndInfoGain = 	    //Utils.eq(defAcRt, 1.0) ? 	    //sndAccu/(double)numConds : 	    sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));		    	  if(fstInfoGain > sndInfoGain){	    isFirst = true;	    infoGain = fstInfoGain;	    accRate = fstAccuRate;	    accurate = fstAccu;	    coverage = fstCover;	  }	  else{	    isFirst = false;	    infoGain = sndInfoGain;	    accRate = sndAccuRate;	    accurate = sndAccu;	    coverage = sndCover;	  }		    	  /* Check whether so far the max infoGain */	  if(infoGain > maxInfoGain){	    splitPoint = data.instance(prev).value(att);	    value = (isFirst) ? 0 : 1;	    accuRate = accRate;	    accu = accurate;	    cover = coverage;	    maxInfoGain = infoGain;	    finalSplit = (isFirst) ? split : prev;	  }		    	  for(int y=prev; y<split; y++){	    Instance inst = data.instance(y);	    sndCover -= inst.weight(); 	    if(Utils.eq(data.instance(y).classValue(), cl)){	      sndAccu -= inst.weight();  // Second bag positive# --	    }	     		   	  }		    	  prev=split;	}      }	          /* Split the data */      Instances[] splitData = new Instances[2];      splitData[0] = new Instances(data, 0, finalSplit);      splitData[1] = new Instances(data, finalSplit, total-finalSplit);	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is covered     *         by this antecedent     */    public boolean covers(Instance inst){      boolean isCover=true;      if(!inst.isMissing(att)){	if((int)value == 0){ // First bag	  if(inst.value(att) > splitPoint)	    isCover=false;	}	else if(inst.value(att) < splitPoint) // Second bag	  isCover=false;      }      else	isCover = false;	          return isCover;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      String symbol = ((int)value == 0) ? " <= " : " >= ";      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));    }     }          /**    * The antecedent with nominal attribute   */  private class NominalAntd extends Antd{	    /* The parameters of infoGain calculated for each attribute value     * in the growing data */    private double[] accurate;    private double[] coverage;	    /* Constructor*/    public NominalAntd(Attribute a){       super(a);          int bag = att.numValues();      accurate = new double[bag];      coverage = new double[bag];    }       /** Implements Copyable */    public Object copy(){      Antd antec = new NominalAntd(getAttr());      antec.value = this.value;      return antec;	        }	    /**     * Implements the splitData function.       * This procedure is to split the data into bags according      * to the nominal attribute value     * The infoGain for each bag is also calculated.       *      * @param data the data to be split     * @param defAcRt the default accuracy rate for data     * @param cl the class label to be predicted     * @return the array of data after split     */    public Instances[] splitData(Instances data, double defAcRt, 				 double cl){      int bag = att.numValues();      Instances[] splitData = new Instances[bag];	          for(int x=0; x<bag; x++){	splitData[x] = new Instances(data, data.numInstances());	accurate[x] = 0;	coverage[x] = 0;      }	          for(int x=0; x<data.numInstances(); x++){	Instance inst=data.instance(x);	if(!inst.isMissing(att)){	  int v = (int)inst.value(att);	  splitData[v].add(inst);	  coverage[v] += inst.weight();	  if((int)inst.classValue() == (int)cl)	    accurate[v] += inst.weight();	}      }	          for(int x=0; x<bag; x++){	double t = coverage[x]+1.0;	double p = accurate[x] + 1.0;			double infoGain = 	  //Utils.eq(defAcRt, 1.0) ? 	  //accurate[x]/(double)numConds : 	  accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));			if(infoGain > maxInfoGain){	  maxInfoGain = infoGain;	  cover = coverage[x];	  accu = accurate[x];	  accuRate = p/t;	  value = (double)x;	}      }	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is     *         covered by this antecedent     */    public boolean covers(Instance inst){      boolean isCover=false;      if(!inst.isMissing(att)){	if((int)inst.value(att) == (int)value)	  isCover=true;	          }      return isCover;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      return (att.name() + " = " +att.value((int)value));    }   }      /**   * This class implements a single rule that predicts specified class.     *   * A rule consists of antecedents "AND"ed together and the consequent    * (class value) for the classification.     * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to   * select an antecedent and Reduced Error Prunning (REP) with the metric   * of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.    */      protected class RipperRule extends Rule{	    /** The internal representation of the class label to be predicted*/    private double m_Consequent = -1;			    /** The vector of antecedents of this rule*/    protected FastVector m_Antds = null;		    public void setConsequent(double cl){  m_Consequent = cl; }    public double getConsequent(){ return m_Consequent; }	    /** Constructor */    public RipperRule(){          m_Antds = new FastVector();	    }	    /**     * Get a shallow copy of this rule     *     * @return the copy     */    public Object copy(){      RipperRule copy = new RipperRule();      copy.setConsequent(getConsequent());      copy.m_Antds = (FastVector)this.m_Antds.copyElements();      return copy;    }	    /**     * Whether the instance covered by this rule     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance      *         is covered by this rule     */    public boolean covers(Instance datum){      boolean isCover=true;	          for(int i=0; i<m_Antds.size(); i++){	Antd antd = (Antd)m_Antds.elementAt(i);	if(!antd.covers(datum)){	  isCover = false;	  break;	}      }	          return isCover;    }        	    /**     * Whether this rule has antecedents, i.e. whether it is a default rule     *      * @return the boolean value indicating whether the rule has antecedents     */    public boolean hasAntds(){      if (m_Antds == null)	return false;      else	return (m_Antds.size() > 0);    }      	    /**      * the number of antecedents of the rule     *     * @return the size of this rule     */    public double size(){ return (double)m_Antds.size(); }			    /**     * Private function to compute default number of accurate instances     * in the specified data for the consequent of the rule     *      * @param data the data in question     * @return the default accuracy number     */    private double computeDefAccu(Instances data){       double defAccu=0;      for(int i=0; i<data.numInstances(); i++){	Instance inst = data.instance(i);	if((int)inst.classValue() == (int)m_Consequent)	  defAccu += inst.weight();      }      return defAccu;    }		    /**     * Build one rule using the growing data     *     * @param data the growing data used to build the rule     * @exception if the consequent is not set yet     */          public void grow(Instances data) throws Exception {      if(m_Consequent == -1)	throw new Exception(" Consequent not set yet.");	          Instances growData = data;	               double sumOfWeights = growData.sumOfWeights();      if(!Utils.gr(sumOfWeights, 0.0))	return;	          /* Compute the default accurate rate of the growing data */      double defAccu = computeDefAccu(growData);      double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0); 	          /* Keep the record of which attributes have already been used*/          boolean[] used=new boolean [growData.numAttributes()];      for (int k=0; k<used.length; k++)	used[k]=false;      int numUnused=used.length;
jrip.java - 源码说明

本页面展示了「wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器」中的 jrip.java 源码文件，采用 Java 编程语言编写，共 1,656 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与university相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?