conjunctiverule.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 1,651 行 · 第 1/4 页
JAVA
1,651 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    ConjunctiveRule.java *    Copyright (C) 2001 Xin Xu * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.ContingencyTables;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import java.io.Serializable;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * This class implements a single conjunctive rule learner that can predict for numeric and nominal class labels.<br/> * <br/> * A rule consists of antecedents "AND"ed together and the consequent (class value) for the classification/regression.  In this case, the consequent is the distribution of the available classes (or mean for a numeric value) in the dataset. If the test instance is not covered by this rule, then it's predicted using the default class distributions/value of the data not covered by the rule in the training data.This learner selects an antecedent by computing the Information Gain of each antecendent and prunes the generated rule using Reduced Error Prunning (REP) or simple pre-pruning based on the number of antecedents.<br/> * <br/> * For classification, the Information of one antecedent is the weighted average of the entropies of both the data covered and not covered by the rule.<br/> * For regression, the Information is the weighted average of the mean-squared errors of both the data covered and not covered by the rule.<br/> * <br/> * In pruning, weighted average of the accuracy rates on the pruning data is used for classification while the weighted average of the mean-squared errors on the pruning data is used for regression.<br/> * <br/> * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -N &lt;number of folds&gt; *  Set number of folds for REP *  One fold is used as pruning set. *  (default 3)</pre> *  * <pre> -R *  Set if NOT uses randomization *  (default:use randomization)</pre> *  * <pre> -E *  Set whether consider the exclusive *  expressions for nominal attributes *  (default false)</pre> *  * <pre> -M &lt;min. weights&gt; *  Set the minimal weights of instances *  within a split. *  (default 2.0)</pre> *  * <pre> -P &lt;number of antecedents&gt; *  Set number of antecedents for pre-pruning *  if -1, then REP is used *  (default -1)</pre> *  * <pre> -S &lt;seed&gt; *  Set the seed of randomization *  (default 1)</pre> *  <!-- options-end --> * * @author Xin XU (xx5@cs.waikato.ac.nz) * @version $Revision: 1.13 $  */public class ConjunctiveRule   extends Classifier   implements OptionHandler, WeightedInstancesHandler{      /** for serialization */  static final long serialVersionUID = -5938309903225087198L;    /** The number of folds to split data into Grow and Prune for REP*/  private int m_Folds = 3;      /** The class attribute of the data*/  private Attribute m_ClassAttribute;      /** The vector of antecedents of this rule*/  protected FastVector m_Antds = null;      /** The default rule distribution of the data not covered*/  protected double[] m_DefDstr = null;      /** The consequent of this rule */  protected double[] m_Cnsqt = null;          /** Number of classes in the training data */  private int m_NumClasses = 0;      /** The seed to perform randomization */  private long m_Seed = 1;      /** The Random object used for randomization */  private Random m_Random = null;      /** The predicted classes recorded for each antecedent in the growing data */  private FastVector m_Targets;  /** Whether to use exlusive expressions for nominal attributes */  private boolean m_IsExclude = false;  /** The minimal number of instance weights within a split*/  private double m_MinNo = 2.0;      /** The number of antecedents in pre-pruning */  private int m_NumAntds = -1;  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return  "This class implements a single conjunctive rule learner that can predict "      + "for numeric and nominal class labels.\n\n"      + "A rule consists of antecedents \"AND\"ed together and the consequent (class value) "      + "for the classification/regression.  In this case, the consequent is the "      + "distribution of the available classes (or mean for a numeric value) in the dataset. "       + "If the test instance is not covered by this rule, then it's predicted "      + "using the default class distributions/value of the data not covered by the "      + "rule in the training data."      + "This learner selects an antecedent by computing the Information Gain of each "      + "antecendent and prunes the generated rule using Reduced Error Prunning (REP) "      + "or simple pre-pruning based on the number of antecedents.\n\n"      + "For classification, the Information of one antecedent is the weighted average of "      + "the entropies of both the data covered and not covered by the rule.\n"      + "For regression, the Information is the weighted average of the mean-squared errors "      + "of both the data covered and not covered by the rule.\n\n"      + "In pruning, weighted average of the accuracy rates on the pruning data is used "      + "for classification while the weighted average of the mean-squared errors "      + "on the pruning data is used for regression.\n\n";  }  /**    * The single antecedent in the rule, which is composed of an attribute and    * the corresponding value.  There are two inherited classes, namely NumericAntd   * and NominalAntd in which the attributes are numeric and nominal respectively.   */      private abstract class Antd implements Serializable {    /** The attribute of the antecedent */    protected Attribute att;	    /** The attribute value of the antecedent.  	For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */    protected double value; 	    /** The maximum infoGain achieved by this antecedent test */    protected double maxInfoGain;	    /** The information of this antecedent test on the growing data */    protected double inform;	    /** The parameter related to the meanSquaredError of the data not covered 	by the previous antecedents when the class is numeric */    protected double uncoverWtSq, uncoverWtVl, uncoverSum;	    /** The parameters related to the data not covered by the previous	antecedents when the class is nominal */    protected double[] uncover;	    /** Constructor for nominal class */    public Antd(Attribute a, double[] unc){      att=a;      value=Double.NaN;       maxInfoGain = 0;      inform = Double.NaN;      uncover = unc;	    }	    /**      * Constructor for numeric class     */    public Antd(Attribute a, double uncoveredWtSq, 		double uncoveredWtVl, double uncoveredWts){      att=a;      value=Double.NaN;       maxInfoGain = 0;      inform = Double.NaN;      uncoverWtSq = uncoveredWtSq;      uncoverWtVl = uncoveredWtVl;      uncoverSum = uncoveredWts;    }	    /* The abstract members for inheritance */    public abstract Instances[] splitData(Instances data, double defInfo);    public abstract boolean isCover(Instance inst);    public abstract String toString();	    /* Get functions of this antecedent */    public Attribute getAttr(){ return att; }    public double getAttrValue(){ return value; }    public double getMaxInfoGain(){ return maxInfoGain; }    public double getInfo(){ return inform;}	    /**      * Function used to calculate the weighted mean squared error,     * i.e., sum[x-avg(x)]^2 based on the given elements of the formula:     * meanSquaredError = sum(Wi*Xi^2) - (sum(WiXi))^2/sum(Wi)     *      * @param weightedSq sum(Wi*Xi^2)     * @param weightedValue sum(WiXi)     * @param sum sum of weights     * @return the weighted mean-squared error     */    protected double wtMeanSqErr(double weightedSq, double weightedValue, double sum){      if(Utils.smOrEq(sum, 1.0E-6))	return 0;	          return (weightedSq - (weightedValue * weightedValue) / sum);    }	    /**     * Function used to calculate the entropy of given vector of values     * entropy = (1/sum)*{-sigma[i=1..P](Xi*log2(Xi)) + sum*log2(sum)}     * where P is the length of the vector     *     * @param value the given vector of values     * @param sum the sum of the given values.  It's provided just for efficiency.     * @return the entropy     */    protected double entropy(double[] value, double sum){	         if(Utils.smOrEq(sum, 1.0E-6))	return 0;      double entropy = 0;	          for(int i=0; i < value.length; i++){	if(!Utils.eq(value[i],0))	  entropy -= value[i] * Utils.log2(value[i]);      }      entropy += sum * Utils.log2(sum);      entropy /= sum;      return entropy;    }  }      /**    * The antecedent with numeric attribute   */  private class NumericAntd     extends Antd {        /** for serialization */    static final long serialVersionUID = -7957266498918210436L;	    /** The split point for this numeric antecedent */    private double splitPoint;	    /**      * Constructor for nominal class     */    public NumericAntd(Attribute a, double[] unc){       super(a, unc);      splitPoint = Double.NaN;    }    	    /**      * Constructor for numeric class     */    public NumericAntd(Attribute a, double sq, double vl, double wts){       super(a, sq, vl, wts);      splitPoint = Double.NaN;    }	    /**      * Get split point of this numeric antecedent     *      * @return the split point     */    public double getSplitPoint(){       return splitPoint;     }	    /**     * Implements the splitData function.       * This procedure is to split the data into two bags according      * to the information gain of the numeric attribute value     * the data with missing values are stored in the last split.     * The maximum infoGain is also calculated.       *      * @param insts the data to be split     * @param defInfo the default information for data     * @return the array of data after split     */    public Instances[] splitData(Instances insts, double defInfo){	      Instances data = new Instances(insts);      data.sort(att);      int total=data.numInstances();// Total number of instances without       // missing value for att      maxInfoGain = 0;      value = 0;		          // Compute minimum number of Instances required in each split      double minSplit;      if(m_ClassAttribute.isNominal()){	minSplit =  0.1 * (data.sumOfWeights()) /	  ((double)m_ClassAttribute.numValues());	if (Utils.smOrEq(minSplit,m_MinNo)) 	  minSplit = m_MinNo;	else if (Utils.gr(minSplit,25)) 	  minSplit = 25;      }      else	minSplit = m_MinNo;       double[] fst=null, snd=null, missing=null;      if(m_ClassAttribute.isNominal()){	fst = new double[m_NumClasses];	snd = new double[m_NumClasses];	missing = new double[m_NumClasses];			for(int v=0; v < m_NumClasses; v++)	  fst[v]=snd[v]=missing[v]=0.0;      }      double fstCover=0, sndCover=0, fstWtSq=0, sndWtSq=0, fstWtVl=0, sndWtVl=0;	          int split=1;                  // Current split position      int prev=0;                   // Previous split position		          int finalSplit=split;         // Final split position		          for(int x=0; x<data.numInstances(); x++){	Instance inst = data.instance(x);	if(inst.isMissing(att)){	  total = x;	  break;	}			sndCover += inst.weight();	if(m_ClassAttribute.isNominal()) // Nominal class	  snd[(int)inst.classValue()] += inst.weight();			else{                            // Numeric class	  sndWtSq += inst.weight() * inst.classValue() * inst.classValue();	  sndWtVl += inst.weight() * inst.classValue();	}      }	    	          // Enough Instances with known values?      if (Utils.sm(sndCover,(2*minSplit)))	return null;	          double msingWtSq=0, msingWtVl=0;      Instances missingData = new Instances(data, 0);      for(int y=total; y < data.numInstances(); y++){	    	Instance inst = data.instance(y);	missingData.add(inst);	if(m_ClassAttribute.isNominal())	  missing[(int)inst.classValue()] += inst.weight();	else{ 	  msingWtSq += inst.weight() * inst.classValue() * inst.classValue();	  msingWtVl += inst.weight() * inst.classValue();	}			      }	    	          if(total == 0) return null; // Data all missing for the attribute 		          splitPoint = data.instance(total-1).value(att);		          for(; split < total; split++){		if(!Utils.eq(data.instance(split).value(att), // Can't split 		     data.instance(prev).value(att))){// within same value 	 		    	  // Move the split point	  for(int y=prev; y<split; y++){	    Instance inst = data.instance(y);	    fstCover += inst.weight(); sndCover -= inst.weight();	    if(m_ClassAttribute.isNominal()){ // Nominal class	      fst[(int)inst.classValue()] += inst.weight();	      snd[(int)inst.classValue()] -= inst.weight();	    }   	    else{                             // Numeric class	      fstWtSq += inst.weight() * inst.classValue() * inst.classValue();	      fstWtVl += inst.weight() * inst.classValue();	      sndWtSq -= inst.weight() * inst.classValue() * inst.classValue();	      sndWtVl -= inst.weight() * inst.classValue();	    }	  }		    	  if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){	    prev=split;  // Cannot split because either	    continue;    // split has not enough data	  }		    	  double fstEntp = 0, sndEntp = 0;
conjunctiverule.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 conjunctiverule.java 源码文件，采用 Java 编程语言编写，共 1,651 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?