📄 conjunctiverule.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	  if(m_ClassAttribute.isNominal()){	    fstEntp = entropy(fst, fstCover);	    sndEntp = entropy(snd, sndCover);	  }	  else{	    fstEntp = wtMeanSqErr(fstWtSq, fstWtVl, fstCover)/fstCover;	    sndEntp = wtMeanSqErr(sndWtSq, sndWtVl, sndCover)/sndCover;	  }		    	  /* Which bag has higher information gain? */	  boolean isFirst; 	  double fstInfoGain, sndInfoGain;	  double info, infoGain, fstInfo, sndInfo;	  if(m_ClassAttribute.isNominal()){	    double sum = data.sumOfWeights();	    double otherCover, whole = sum + Utils.sum(uncover), otherEntropy; 	    double[] other = null;				    // InfoGain of first bag				    other = new double[m_NumClasses];	    for(int z=0; z < m_NumClasses; z++)	      other[z] = uncover[z] + snd[z] + missing[z];   	    otherCover = whole - fstCover;				    otherEntropy = entropy(other, otherCover);	    // Weighted average	    fstInfo = (fstEntp*fstCover + otherEntropy*otherCover)/whole;	    fstInfoGain = defInfo - fstInfo;				    // InfoGain of second bag 				    other = new double[m_NumClasses];	    for(int z=0; z < m_NumClasses; z++)	      other[z] = uncover[z] + fst[z] + missing[z]; 	    otherCover = whole - sndCover;				    otherEntropy = entropy(other, otherCover);	    // Weighted average	    sndInfo = (sndEntp*sndCover + otherEntropy*otherCover)/whole;			    	    sndInfoGain = defInfo - sndInfo;				  }	  else{	    double sum = data.sumOfWeights();	    double otherWtSq = (sndWtSq + msingWtSq + uncoverWtSq), 	      otherWtVl = (sndWtVl + msingWtVl + uncoverWtVl),	      otherCover = (sum - fstCover + uncoverSum);				    fstInfo = Utils.eq(fstCover, 0) ? 0 : (fstEntp * fstCover);	    fstInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);	    fstInfoGain = defInfo - fstInfo;				    otherWtSq = (fstWtSq + msingWtSq + uncoverWtSq); 	    otherWtVl = (fstWtVl + msingWtVl + uncoverWtVl);	    otherCover = sum - sndCover + uncoverSum;	    sndInfo = Utils.eq(sndCover, 0) ? 0 : (sndEntp * sndCover);	    sndInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);	    sndInfoGain = defInfo - sndInfo;	  }		    	  if(Utils.gr(fstInfoGain,sndInfoGain) || 	     (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.sm(fstEntp,sndEntp)))){ 	    isFirst = true;	    infoGain = fstInfoGain;	    info = fstInfo;	  }	  else{	    isFirst = false;	    infoGain = sndInfoGain;	    info = sndInfo;	  }		    	  boolean isUpdate = Utils.gr(infoGain, maxInfoGain);		    	  /* Check whether so far the max infoGain */	  if(isUpdate){	    splitPoint = ((data.instance(split).value(att)) + (data.instance(prev).value(att)))/2.0;	    value = ((isFirst) ? 0 : 1);	    inform = info;	    maxInfoGain = infoGain;	    finalSplit = split;				  }	  prev=split;	}      }	          /* Split the data */      Instances[] splitData = new Instances[3];      splitData[0] = new Instances(data, 0, finalSplit);      splitData[1] = new Instances(data, finalSplit, total-finalSplit);      splitData[2] = new Instances(missingData);	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is covered      *         by this antecedent     */    public boolean isCover(Instance inst){      boolean isCover=false;      if(!inst.isMissing(att)){	if(Utils.eq(value, 0)){	  if(Utils.smOrEq(inst.value(att), splitPoint))	    isCover=true;	}	else if(Utils.gr(inst.value(att), splitPoint))	  isCover=true;      }      return isCover;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));    }     }          /**    * The antecedent with nominal attribute   */  class NominalAntd     extends Antd {	    /** for serialization */    static final long serialVersionUID = -5949864163376447424L;        /* The parameters of infoGain calculated for each attribute value */    private double[][] stats;    private double[] coverage;    private boolean isIn;	    /**      * Constructor for nominal class     */    public NominalAntd(Attribute a, double[] unc){       super(a, unc);      int bag = att.numValues();      stats = new double[bag][m_NumClasses];      coverage = new double[bag];      isIn = true;    }   	    /**      * Constructor for numeric class     */    public NominalAntd(Attribute a, double sq, double vl, double wts){       super(a, sq, vl, wts);      int bag = att.numValues();	          stats = null;      coverage = new double[bag];      isIn = true;    }	    /**     * Implements the splitData function.       * This procedure is to split the data into bags according      * to the nominal attribute value     * the data with missing values are stored in the last bag.     * The infoGain for each bag is also calculated.       *      * @param data the data to be split     * @param defInfo the default information for data     * @return the array of data after split     */    public Instances[] splitData(Instances data, double defInfo){      int bag = att.numValues();      Instances[] splitData = new Instances[bag+1];      double[] wSq = new double[bag];      double[] wVl = new double[bag];      double totalWS=0, totalWV=0, msingWS=0, msingWV=0, sum=data.sumOfWeights();      double[] all = new double[m_NumClasses];      double[] missing = new double[m_NumClasses];	   	          for(int w=0; w < m_NumClasses; w++)	all[w] = missing[w] = 0;      for(int x=0; x<bag; x++){	coverage[x] = wSq[x] = wVl[x] = 0;	if(stats != null)	  for(int y=0; y < m_NumClasses; y++)	    stats[x][y] = 0;			splitData[x] = new Instances(data, data.numInstances());      }      splitData[bag] = new Instances(data, data.numInstances());	          // Record the statistics of data      for(int x=0; x<data.numInstances(); x++){	Instance inst=data.instance(x);	if(!inst.isMissing(att)){	  int v = (int)inst.value(att);	  splitData[v].add(inst);	  coverage[v] += inst.weight();	  if(m_ClassAttribute.isNominal()){ // Nominal class				    stats[v][(int)inst.classValue()] += inst.weight();	    all[(int)inst.classValue()] += inst.weight();	    	  }	  else{                             // Numeric class	    wSq[v] += inst.weight() * inst.classValue() * inst.classValue();	    wVl[v] += inst.weight() * inst.classValue();	    totalWS += inst.weight() * inst.classValue() * inst.classValue();	    totalWV += inst.weight() * inst.classValue();	  }	}	else{	  splitData[bag].add(inst);	  if(m_ClassAttribute.isNominal()){ // Nominal class	    all[(int)inst.classValue()] += inst.weight();	    missing[(int)inst.classValue()] += inst.weight();	  }	  else{                            // Numeric class	    totalWS += inst.weight() * inst.classValue() * inst.classValue();	    totalWV += inst.weight() * inst.classValue();	    msingWS += inst.weight() * inst.classValue() * inst.classValue();	    msingWV += inst.weight() * inst.classValue();		 	  }	}      }	          // The total weights of the whole grow data      double whole;      if(m_ClassAttribute.isNominal())	whole = sum + Utils.sum(uncover);      else	whole = sum + uncoverSum;	        // Find the split        double minEntrp=Double.MAX_VALUE;      maxInfoGain = 0;	          // Check if >=2 splits have more than the minimal data      int count=0;      for(int x=0; x<bag; x++)	if(Utils.grOrEq(coverage[x], m_MinNo))		    	  ++count;	          if(count < 2){ // Don't split	maxInfoGain = 0;	inform = defInfo;	value = Double.NaN;	return null;      }	          for(int x=0; x<bag; x++){			double t = coverage[x], entrp, infoGain;	if(Utils.sm(t, m_MinNo))	  continue;			if(m_ClassAttribute.isNominal()){ // Nominal class	   	  double[] other = new double[m_NumClasses];	  for(int y=0; y < m_NumClasses; y++)	    other[y] = all[y] - stats[x][y] + uncover[y]; 	  double otherCover = whole - t;			    	  // Entropies of data covered and uncovered 		  entrp = entropy(stats[x], t);	  double uncEntp = entropy(other, otherCover);		    	  // Weighted average	  infoGain = defInfo - (entrp*t + uncEntp*otherCover)/whole;		   	}		else{                             // Numeric class	  double weight = (whole - t);	  entrp = wtMeanSqErr(wSq[x], wVl[x], t)/t;	  infoGain = defInfo - (entrp * t) - 	    wtMeanSqErr((totalWS-wSq[x]+uncoverWtSq),			(totalWV-wVl[x]+uncoverWtVl), 			weight);		  	}   					// Test the exclusive expression	boolean isWithin =true;			if(m_IsExclude){	  double infoGain2, entrp2;	  if(m_ClassAttribute.isNominal()){ // Nominal class		    double[] other2 = new double[m_NumClasses];	    double[] notIn = new double[m_NumClasses];	    for(int y=0; y < m_NumClasses; y++){	      other2[y] = stats[x][y] + missing[y] + uncover[y];	      notIn[y] = all[y] - stats[x][y] - missing[y];	    } 				    double msSum = Utils.sum(missing);	    double otherCover2 = t + msSum + Utils.sum(uncover);				    entrp2 = entropy(notIn, (sum-t-msSum));	    double uncEntp2 = entropy(other2, otherCover2);	    infoGain2 = defInfo - 	      (entrp2*(sum-t-msSum) + uncEntp2*otherCover2)/whole;	  }	  else{                             // Numeric class	    double msWts = splitData[bag].sumOfWeights();	    double weight2 = t + uncoverSum + msWts;				    entrp2 = wtMeanSqErr((totalWS-wSq[x]-msingWS),				 (totalWV-wVl[x]-msingWV),(sum-t-msWts))	      /(sum-t-msWts);	    infoGain2 = defInfo - entrp2 * (sum-t-msWts) -	      wtMeanSqErr((wSq[x]+uncoverWtSq+msingWS),			  (wVl[x]+uncoverWtVl+msingWV), 			  weight2);	  }		    	  // Use the exclusive expression?	  if (Utils.gr(infoGain2, infoGain) ||	      (Utils.eq(infoGain2, infoGain) && Utils.sm(entrp2, entrp))){	    infoGain = infoGain2;	    entrp = entrp2;	    isWithin =false;	  }	}			// Test this split	if (Utils.gr(infoGain, maxInfoGain) ||	    (Utils.eq(infoGain, maxInfoGain) && Utils.sm(entrp, minEntrp))){	  value = (double)x;	  maxInfoGain = infoGain;	  inform = maxInfoGain - defInfo;	  minEntrp = entrp;	  isIn = isWithin;	}		      }	          return splitData;    }	    /**     * Whether the instance is covered by this antecedent     *      * @param inst the instance in question     * @return the boolean value indicating whether the instance is covered      *         by this antecedent     */    public boolean isCover(Instance inst){	        boolean isCover=false;      if(!inst.isMissing(att)){	if(isIn){	  if(Utils.eq(inst.value(att), value))	    isCover=true;	}	else if(!Utils.eq(inst.value(att), value))	  isCover=true;      }      return isCover;    }	    /**     * Whether the expression is "att = value" or att != value"     * for this nominal attribute.  True if in the former expression,      * otherwise the latter     *      * @return the boolean value     */    public boolean isIn(){	       return isIn;    }	    /**     * Prints this antecedent     *     * @return a textual description of this antecedent     */    public String toString() {      String symbol = isIn ? " = " : " != ";	          return (att.name() + symbol + att.value((int)value));    }   }      /**   * Returns an enumeration describing the available options   * Valid options are: <p>   *   * -N number <br>   * Set number of folds for REP. One fold is   * used as the pruning set. (Default: 3) <p>   *   * -R <br>   * Set if NOT randomize the data before split to growing and    * pruning data. If NOT set, the seed of randomization is    * specified by the -S option. (Default: randomize) <p>   *    * -S <br>   * Seed of randomization. (Default: 1)<p>   *   * -E <br>   * Set whether consider the exclusive expressions for nominal   * attribute split. (Default: false) <p>   *   * -M number <br>   * Set the minimal weights of instances within a split.   * (Default: 2) <p>   *   * -P number <br>   * Set the number of antecedents allowed in the rule if pre-pruning   * is used.  If this value is other than -1, then pre-pruning will be   * used, otherwise the rule uses REP. (Default: -1) <p>   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector newVector = new Vector(6);	    newVector.addElement(new Option("\tSet number of folds for REP\n" +				    "\tOne fold is used as pruning set.\n" +				    "\t(default 3)","N", 1, "-N <number of folds>"));	    newVector.addElement(new Option("\tSet if NOT uses randomization\n" +
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -