⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 conjunctiverule.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
		    
	  if(m_ClassAttribute.isNominal()){
	    fstEntp = entropy(fst, fstCover);
	    sndEntp = entropy(snd, sndCover);
	  }
	  else{
	    fstEntp = wtMeanSqErr(fstWtSq, fstWtVl, fstCover)/fstCover;
	    sndEntp = wtMeanSqErr(sndWtSq, sndWtVl, sndCover)/sndCover;
	  }
		    
	  /* Which bag has higher information gain? */
	  boolean isFirst; 
	  double fstInfoGain, sndInfoGain;
	  double info, infoGain, fstInfo, sndInfo;
	  if(m_ClassAttribute.isNominal()){
	    double sum = data.sumOfWeights();
	    double otherCover, whole = sum + Utils.sum(uncover), otherEntropy; 
	    double[] other = null;
			
	    // InfoGain of first bag			
	    other = new double[m_NumClasses];
	    for(int z=0; z < m_NumClasses; z++)
	      other[z] = uncover[z] + snd[z] + missing[z];   
	    otherCover = whole - fstCover;			
	    otherEntropy = entropy(other, otherCover);
	    // Weighted average
	    fstInfo = (fstEntp*fstCover + otherEntropy*otherCover)/whole;
	    fstInfoGain = defInfo - fstInfo;
			
	    // InfoGain of second bag 			
	    other = new double[m_NumClasses];
	    for(int z=0; z < m_NumClasses; z++)
	      other[z] = uncover[z] + fst[z] + missing[z]; 
	    otherCover = whole - sndCover;			
	    otherEntropy = entropy(other, otherCover);
	    // Weighted average
	    sndInfo = (sndEntp*sndCover + otherEntropy*otherCover)/whole;			    
	    sndInfoGain = defInfo - sndInfo;			
	  }
	  else{
	    double sum = data.sumOfWeights();
	    double otherWtSq = (sndWtSq + msingWtSq + uncoverWtSq), 
	      otherWtVl = (sndWtVl + msingWtVl + uncoverWtVl),
	      otherCover = (sum - fstCover + uncoverSum);
			
	    fstInfo = Utils.eq(fstCover, 0) ? 0 : (fstEntp * fstCover);
	    fstInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);
	    fstInfoGain = defInfo - fstInfo;
			
	    otherWtSq = (fstWtSq + msingWtSq + uncoverWtSq); 
	    otherWtVl = (fstWtVl + msingWtVl + uncoverWtVl);
	    otherCover = sum - sndCover + uncoverSum;
	    sndInfo = Utils.eq(sndCover, 0) ? 0 : (sndEntp * sndCover);
	    sndInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);
	    sndInfoGain = defInfo - sndInfo;
	  }
		    
	  if(Utils.gr(fstInfoGain,sndInfoGain) || 
	     (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.sm(fstEntp,sndEntp)))){ 
	    isFirst = true;
	    infoGain = fstInfoGain;
	    info = fstInfo;
	  }
	  else{
	    isFirst = false;
	    infoGain = sndInfoGain;
	    info = sndInfo;
	  }
		    
	  boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
		    
	  /* Check whether so far the max infoGain */
	  if(isUpdate){
	    splitPoint = ((data.instance(split).value(att)) + (data.instance(prev).value(att)))/2.0;
	    value = ((isFirst) ? 0 : 1);
	    inform = info;
	    maxInfoGain = infoGain;
	    finalSplit = split;			
	  }
	  prev=split;
	}
      }
	    
      /* Split the data */
      Instances[] splitData = new Instances[3];
      splitData[0] = new Instances(data, 0, finalSplit);
      splitData[1] = new Instances(data, finalSplit, total-finalSplit);
      splitData[2] = new Instances(missingData);
	    
      return splitData;
    }
	
    /**
     * Whether the instance is covered by this antecedent
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance is covered 
     *         by this antecedent
     */
    public boolean isCover(Instance inst){
      boolean isCover=false;
      if(!inst.isMissing(att)){
	if(Utils.eq(value, 0)){
	  if(Utils.smOrEq(inst.value(att), splitPoint))
	    isCover=true;
	}
	else if(Utils.gr(inst.value(att), splitPoint))
	  isCover=true;
      }
      return isCover;
    }
	
    /**
     * Prints this antecedent
     *
     * @return a textual description of this antecedent
     */
    public String toString() {
      String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
      return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
    }   
  }
    
    
  /** 
   * The antecedent with nominal attribute
   */
  class NominalAntd extends Antd{
	
    /* The parameters of infoGain calculated for each attribute value */
    private double[][] stats;
    private double[] coverage;
    private boolean isIn;
	
    /* Constructor for nominal class */
    public NominalAntd(Attribute a, double[] unc){ 
      super(a, unc);
      int bag = att.numValues();
      stats = new double[bag][m_NumClasses];
      coverage = new double[bag];
      isIn = true;
    }   
	
    /* Constructor for numeric class */
    public NominalAntd(Attribute a, double sq, double vl, double wts){ 
      super(a, sq, vl, wts);
      int bag = att.numValues();	    
      stats = null;
      coverage = new double[bag];
      isIn = true;
    }
	
    /**
     * Implements the splitData function.  
     * This procedure is to split the data into bags according 
     * to the nominal attribute value
     * the data with missing values are stored in the last bag.
     * The infoGain for each bag is also calculated.  
     * 
     * @param data the data to be split
     * @param defInfo the default information for data
     * @return the array of data after split
     */
    public Instances[] splitData(Instances data, double defInfo){
      int bag = att.numValues();
      Instances[] splitData = new Instances[bag+1];
      double[] wSq = new double[bag];
      double[] wVl = new double[bag];
      double totalWS=0, totalWV=0, msingWS=0, msingWV=0, sum=data.sumOfWeights();
      double[] all = new double[m_NumClasses];
      double[] missing = new double[m_NumClasses];	   
	    
      for(int w=0; w < m_NumClasses; w++)
	all[w] = missing[w] = 0;

      for(int x=0; x<bag; x++){
	coverage[x] = wSq[x] = wVl[x] = 0;
	if(stats != null)
	  for(int y=0; y < m_NumClasses; y++)
	    stats[x][y] = 0;		
	splitData[x] = new Instances(data, data.numInstances());
      }
      splitData[bag] = new Instances(data, data.numInstances());
	    
      // Record the statistics of data
      for(int x=0; x<data.numInstances(); x++){
	Instance inst=data.instance(x);
	if(!inst.isMissing(att)){
	  int v = (int)inst.value(att);
	  splitData[v].add(inst);
	  coverage[v] += inst.weight();
	  if(m_ClassAttribute.isNominal()){ // Nominal class			
	    stats[v][(int)inst.classValue()] += inst.weight();
	    all[(int)inst.classValue()] += inst.weight();	    
	  }
	  else{                             // Numeric class
	    wSq[v] += inst.weight() * inst.classValue() * inst.classValue();
	    wVl[v] += inst.weight() * inst.classValue();
	    totalWS += inst.weight() * inst.classValue() * inst.classValue();
	    totalWV += inst.weight() * inst.classValue();
	  }
	}
	else{
	  splitData[bag].add(inst);
	  if(m_ClassAttribute.isNominal()){ // Nominal class
	    all[(int)inst.classValue()] += inst.weight();
	    missing[(int)inst.classValue()] += inst.weight();
	  }
	  else{                            // Numeric class
	    totalWS += inst.weight() * inst.classValue() * inst.classValue();
	    totalWV += inst.weight() * inst.classValue();
	    msingWS += inst.weight() * inst.classValue() * inst.classValue();
	    msingWV += inst.weight() * inst.classValue();		 
	  }
	}
      }
	    
      // The total weights of the whole grow data
      double whole;
      if(m_ClassAttribute.isNominal())
	whole = sum + Utils.sum(uncover);
      else
	whole = sum + uncoverSum;
	  
      // Find the split  
      double minEntrp=Double.MAX_VALUE;
      maxInfoGain = 0;
	    
      // Check if >=2 splits have more than the minimal data
      int count=0;
      for(int x=0; x<bag; x++)
	if(Utils.grOrEq(coverage[x], m_MinNo))		    
	  ++count;
	    
      if(count < 2){ // Don't split
	maxInfoGain = 0;
	inform = defInfo;
	value = Double.NaN;
	return null;
      }
	    
      for(int x=0; x<bag; x++){		
	double t = coverage[x], entrp, infoGain;

	if(Utils.sm(t, m_MinNo))
	  continue;
		
	if(m_ClassAttribute.isNominal()){ // Nominal class	   
	  double[] other = new double[m_NumClasses];
	  for(int y=0; y < m_NumClasses; y++)
	    other[y] = all[y] - stats[x][y] + uncover[y]; 
	  double otherCover = whole - t;	
		    
	  // Entropies of data covered and uncovered 	
	  entrp = entropy(stats[x], t);
	  double uncEntp = entropy(other, otherCover);
		    
	  // Weighted average
	  infoGain = defInfo - (entrp*t + uncEntp*otherCover)/whole;		   
	}	
	else{                             // Numeric class
	  double weight = (whole - t);
	  entrp = wtMeanSqErr(wSq[x], wVl[x], t)/t;
	  infoGain = defInfo - (entrp * t) - 
	    wtMeanSqErr((totalWS-wSq[x]+uncoverWtSq),
			(totalWV-wVl[x]+uncoverWtVl), 
			weight);		  
	}   		
		
	// Test the exclusive expression
	boolean isWithin =true;		
	if(m_IsExclude){
	  double infoGain2, entrp2;
	  if(m_ClassAttribute.isNominal()){ // Nominal class	
	    double[] other2 = new double[m_NumClasses];
	    double[] notIn = new double[m_NumClasses];
	    for(int y=0; y < m_NumClasses; y++){
	      other2[y] = stats[x][y] + missing[y] + uncover[y];
	      notIn[y] = all[y] - stats[x][y] - missing[y];
	    } 
			
	    double msSum = Utils.sum(missing);
	    double otherCover2 = t + msSum + Utils.sum(uncover);
			
	    entrp2 = entropy(notIn, (sum-t-msSum));
	    double uncEntp2 = entropy(other2, otherCover2);
	    infoGain2 = defInfo - 
	      (entrp2*(sum-t-msSum) + uncEntp2*otherCover2)/whole;
	  }
	  else{                             // Numeric class
	    double msWts = splitData[bag].sumOfWeights();
	    double weight2 = t + uncoverSum + msWts;
			
	    entrp2 = wtMeanSqErr((totalWS-wSq[x]-msingWS),
				 (totalWV-wVl[x]-msingWV),(sum-t-msWts))
	      /(sum-t-msWts);
	    infoGain2 = defInfo - entrp2 * (sum-t-msWts) -
	      wtMeanSqErr((wSq[x]+uncoverWtSq+msingWS),
			  (wVl[x]+uncoverWtVl+msingWV), 
			  weight2);
	  }
		    
	  // Use the exclusive expression?
	  if (Utils.gr(infoGain2, infoGain) ||
	      (Utils.eq(infoGain2, infoGain) && Utils.sm(entrp2, entrp))){
	    infoGain = infoGain2;
	    entrp = entrp2;
	    isWithin =false;
	  }
	}
		
	// Test this split
	if (Utils.gr(infoGain, maxInfoGain) ||
	    (Utils.eq(infoGain, maxInfoGain) && Utils.sm(entrp, minEntrp))){
	  value = (double)x;
	  maxInfoGain = infoGain;
	  inform = maxInfoGain - defInfo;
	  minEntrp = entrp;
	  isIn = isWithin;
	}		
      }
	    
      return splitData;
    }
	
    /**
     * Whether the instance is covered by this antecedent
     * 
     * @param inst the instance in question
     * @return the boolean value indicating whether the instance is covered 
     *         by this antecedent
     */
    public boolean isCover(Instance inst){	  
      boolean isCover=false;
      if(!inst.isMissing(att)){
	if(isIn){
	  if(Utils.eq(inst.value(att), value))
	    isCover=true;
	}
	else if(!Utils.eq(inst.value(att), value))
	  isCover=true;
      }
      return isCover;
    }
	
    /**
     * Whether the expression is "att = value" or att != value"
     * for this nominal attribute.  True if in the former expression, 
     * otherwise the latter
     * 
     * @return the boolean value
     */
    public boolean isIn(){	 
      return isIn;
    }
	
    /**
     * Prints this antecedent
     *
     * @return a textual description of this antecedent
     */
    public String toString() {
      String symbol = isIn ? " = " : " != ";	    
      return (att.name() + symbol + att.value((int)value));
    } 
  }
    
  /**
   * Returns an enumeration describing the available options

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -