📄 jrip.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
    FastVector ruleset = new FastVector();		
	
    double dl = defDL, minDL = defDL;
    RuleStats rstats = null;
    double[] rst;
	
    // Check whether data have positive examples
    boolean defHasPositive = true; // No longer used
    boolean hasPositive = defHasPositive;
	
    /********************** Building stage ***********************/	
    if(m_Debug)
      System.err.println("\n*** Building stage ***");
    
    while((!stop) && hasPositive){ // Generate new rules until
      // stopping criteria met
      RipperRule oneRule;
      if(m_UsePruning){		
	/* Split data into Grow and Prune*/

	// We should have stratified the data, but ripper seems
	// to have a bug that makes it not to do so.  In order
	// to simulate it more precisely, we do the same thing.
	//newData.randomize(m_Random);	
	newData = RuleStats.stratify(newData, m_Folds, m_Random);		
	Instances[] part = RuleStats.partition(newData, m_Folds);
	growData=part[0];
	pruneData=part[1];
	//growData=newData.trainCV(m_Folds, m_Folds-1);
	//pruneData=newData.testCV(m_Folds, m_Folds-1);	
		
	oneRule = new RipperRule();
	oneRule.setConsequent(classIndex);  // Must set first
		
	if(m_Debug)
	  System.err.println("\nGrowing a rule ...");  
	oneRule.grow(growData);             // Build the rule
	if(m_Debug)
	  System.err.println("One rule found before pruning:"+
			     oneRule.toString(m_Class));
		
	if(m_Debug)
	  System.err.println("\nPruning the rule ...");  
	oneRule.prune(pruneData, false);    // Prune the rule
	if(m_Debug)
	  System.err.println("One rule found after pruning:"+
			     oneRule.toString(m_Class));
      }
      else{
	oneRule = new RipperRule();
	oneRule.setConsequent(classIndex);  // Must set first
	if(m_Debug)
	  System.err.println("\nNo pruning: growing a rule ...");
	oneRule.grow(newData);             // Build the rule
	if(m_Debug)
	  System.err.println("No pruning: one rule found:\n"+
			     oneRule.toString(m_Class));
      }
	    
      // Compute the DL of this ruleset
      if(rstats == null){ // First rule
	rstats = new RuleStats();
	rstats.setNumAllConds(m_Total);
	rstats.setData(newData);
      }
	    
      rstats.addAndUpdate(oneRule);		    
      int last = rstats.getRuleset().size()-1; // Index of last rule
      dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
	    
      if(Double.isNaN(dl) || Double.isInfinite(dl))
	throw new Exception("Should never happen: dl in "+
			    "building stage NaN or infinite!");
      if(m_Debug)
	System.err.println("Before optimization("+last+
			   "): the dl = "+dl+" | best: "+minDL);
	    
      if(dl < minDL)
	minDL = dl;  // The best dl so far	
	    
      rst = rstats.getSimpleStats(last);	    
      if(m_Debug)
	System.err.println("The rule covers: "+rst[0]+
			   " | pos = " + rst[2] + 
			   " | neg = " + rst[4]+
			   "\nThe rule doesn't cover: "+rst[1]+
			   " | pos = " + rst[5]);
	    
      stop = checkStop(rst, minDL, dl);
	    
      if(!stop){	  		
	ruleset.addElement(oneRule);          // Accepted 
	newData = rstats.getFiltered(last)[1];// Data not covered
	hasPositive = Utils.gr(rst[5], 0.0);  // Positives remaining?
	if(m_Debug)
	  System.err.println("One rule added: has positive? "
			     +hasPositive);
      }
      else{
	if(m_Debug)
	  System.err.println("Quit rule");
	rstats.removeLast(); // Remove last to be re-used
      }
    }// while !stop	
	
    /******************** Optimization stage *******************/
    RuleStats finalRulesetStat = null;
    if(m_UsePruning){	 
      for(int z=0; z < m_Optimizations; z++){
	if(m_Debug)
	  System.err.println("\n*** Optimization: run #"
			     +z+" ***");
		
	newData = data;		    
	finalRulesetStat = new RuleStats();
	finalRulesetStat.setData(newData);
	finalRulesetStat.setNumAllConds(m_Total);
	int position=0;
	stop = false;
	boolean isResidual = false;	    
	hasPositive = defHasPositive;		    
	dl = minDL = defDL;
		
      oneRule:    
	while(!stop && hasPositive){			
		    
	  isResidual = (position>=ruleset.size()); // Cover residual positive examples  
	  // Re-do shuffling and stratification    
	  //newData.randomize(m_Random);	
	  newData = RuleStats.stratify(newData, m_Folds, m_Random);
	  Instances[] part = RuleStats.partition(newData, m_Folds);
	  growData=part[0];
	  pruneData=part[1];
	  //growData=newData.trainCV(m_Folds, m_Folds-1);
	  //pruneData=newData.testCV(m_Folds, m_Folds-1);	   
	  RipperRule finalRule;
		    
	  if(m_Debug)
	    System.err.println("\nRule #"+position +
			       "| isResidual?" + isResidual+
			       "| data size: "+newData.sumOfWeights());
		    
	  if(isResidual){
	    RipperRule newRule = new RipperRule();   
	    newRule.setConsequent(classIndex);
	    if(m_Debug)
	      System.err.println("\nGrowing and pruning"+
				 " a new rule ..."); 
	    newRule.grow(growData);
	    newRule.prune(pruneData, false);
	    finalRule = newRule;
	    if(m_Debug)
	      System.err.println("\nNew rule found: "+
				 newRule.toString(m_Class));
	  }
	  else{
	    RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
	    boolean covers = false;
	    // Test coverage of the next old rule
	    for(int i=0; i<newData.numInstances(); i++)
	      if(oldRule.covers(newData.instance(i))){
		covers = true;
		break;
	      }
			
	    if(!covers){// Null coverage, no variants can be generated
	      finalRulesetStat.addAndUpdate(oldRule);
	      position++;
	      continue oneRule;
	    }  
			
	    // 2 variants 
	    if(m_Debug)
	      System.err.println("\nGrowing and pruning"+
				 " Replace ..."); 
	    RipperRule replace = new RipperRule();   
	    replace.setConsequent(classIndex);
	    replace.grow(growData);
			
	    // Remove the pruning data covered by the following
	    // rules, then simply compute the error rate of the
	    // current rule to prune it.  According to Ripper,
	    // it's equivalent to computing the error of the 
	    // whole ruleset -- is it true?
	    pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);      	
	    replace.prune(pruneData, true);
			
	    if(m_Debug)
	      System.err.println("\nGrowing and pruning"+
				 " Revision ..."); 
	    RipperRule revision = (RipperRule)oldRule.copy(); 
			
	    // For revision, first rm the data covered by the old rule
	    Instances newGrowData = new Instances(growData, 0);
	    for(int b=0; b<growData.numInstances(); b++){
	      Instance inst = growData.instance(b);
	      if(revision.covers(inst))
		newGrowData.add(inst);
	    }
	    revision.grow(newGrowData);	      
	    revision.prune(pruneData, true);
			
	    double[][] prevRuleStats = new double[position][6];
	    for(int c=0; c < position; c++)
		prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);

	    // Now compare the relative DL of variants
	    FastVector tempRules = (FastVector)ruleset.copyElements();
	    tempRules.setElementAt(replace, position);
			
	    RuleStats repStat = new RuleStats(data, tempRules);
	    repStat.setNumAllConds(m_Total);
	    repStat.countData(position, newData, prevRuleStats);
	    //repStat.countData();
	    rst = repStat.getSimpleStats(position);	    
	    if(m_Debug)
	      System.err.println("Replace rule covers: "+rst[0]+
				 " | pos = " + rst[2] + 
				 " | neg = " + rst[4]+
				 "\nThe rule doesn't cover: "+rst[1]+
				 " | pos = " + rst[5]);
			
	    double repDL = repStat.relativeDL(position, expFPRate,
					      m_CheckErr);
	    if(m_Debug)
	      System.err.println("\nReplace: "+
				 replace.toString(m_Class)
				 +" |dl = "+repDL); 
			
	    if(Double.isNaN(repDL) || Double.isInfinite(repDL))
	      throw new Exception("Should never happen: repDL"+
				  "in optmz. stage NaN or "+
				  "infinite!");
			
	    tempRules.setElementAt(revision, position);
	    RuleStats revStat = new RuleStats(data, tempRules);
	    revStat.setNumAllConds(m_Total);
	    revStat.countData(position, newData, prevRuleStats);
	    //revStat.countData();
	    double revDL = revStat.relativeDL(position, expFPRate,
					      m_CheckErr);
			
	    if(m_Debug)
	      System.err.println("Revision: "
				 + revision.toString(m_Class)
				 +" |dl = "+revDL);
			
	    if(Double.isNaN(revDL) || Double.isInfinite(revDL))
	      throw new Exception("Should never happen: revDL"+
				  "in optmz. stage NaN or "+
				  "infinite!");
			
	    rstats = new RuleStats(data, ruleset);
	    rstats.setNumAllConds(m_Total);
	    rstats.countData(position, newData, prevRuleStats);
	    //rstats.countData();
	    double oldDL = rstats.relativeDL(position, expFPRate,
					     m_CheckErr);
			
	    if(Double.isNaN(oldDL) || Double.isInfinite(oldDL))
	      throw new Exception("Should never happen: oldDL"+
				  "in optmz. stage NaN or "+
				  "infinite!");
	    if(m_Debug)
	      System.err.println("Old rule: "+
				 oldRule.toString(m_Class)
				 +" |dl = "+oldDL); 
			
	    if(m_Debug)
	      System.err.println("\nrepDL: "+repDL+ 
				 "\nrevDL: "+revDL+
				 "\noldDL: "+oldDL);
			
	    if((oldDL <= revDL) && (oldDL <= repDL))
	      finalRule = oldRule; // Old the best
	    else if(revDL <= repDL)
	      finalRule = revision; // Revision the best
	    else
	      finalRule = replace; // Replace the best  
	  }		
		    
	  finalRulesetStat.addAndUpdate(finalRule);  	 
	  rst = finalRulesetStat.getSimpleStats(position);
		    
	  if(isResidual){
			
	    dl += finalRulesetStat.relativeDL(position, 
					      expFPRate,
					      m_CheckErr);
	    if(m_Debug)
	      System.err.println("After optimization: the dl"
				 +"="+dl+" | best: "+minDL);
			
	    if(dl < minDL)
	      minDL = dl;  // The best dl so far
			
	    stop = checkStop(rst, minDL, dl);
	    if(!stop)
	      ruleset.addElement(finalRule); // Accepted 
	    else{
	      finalRulesetStat.removeLast(); // Remove last to be re-used
	      position--;
	    }
	  }
	  else
	    ruleset.setElementAt(finalRule, position); // Accepted 

	  if(m_Debug){
	    System.err.println("The rule covers: "+rst[0]+
			       " | pos = " + rst[2] + 
			       " | neg = " + rst[4]+
			       "\nThe rule doesn't cover: "+rst[1]+
			       " | pos = " + rst[5]);		
	    System.err.println("\nRuleset so far: ");
	    for(int x=0; x<ruleset.size(); x++)
	      System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
	    System.err.println();
	  }
		    
	  //Data not covered	
	  if(finalRulesetStat.getRulesetSize() > 0)// If any rules	
	    newData = finalRulesetStat.getFiltered(position)[1]; 
	  hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining? 
	  position++;
	} // while !stop && hasPositive
		
	if(ruleset.size() > (position+1)){ // Hasn't gone through yet
	  for(int k=position+1; k<ruleset.size(); k++)
	    finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));
	}
	if(m_Debug)
	  System.err.println("\nDeleting rules to decrease"+
			     " DL of the whole ruleset ..."); 
	finalRulesetStat.reduceDL(expFPRate, m_CheckErr);
	if(m_Debug){
	  int del = ruleset.size() -
	    finalRulesetStat.getRulesetSize(); 
	  System.err.println(del+" rules are deleted"+
			     " after DL reduction procedure");
	}
	ruleset = finalRulesetStat.getRuleset();
	rstats = finalRulesetStat;	      	    
		
      } // For each run of optimization
    } // if pruning is used
	
    // Concatenate the ruleset for this class to the whole ruleset
    if(m_Debug){
      System.err.println("\nFinal ruleset: ");
      for(int x=0; x<ruleset.size(); x++)
	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
      System.err.println();
    }
	
    m_Ruleset.appendElements(ruleset);
    m_RulesetStats.addElement(rstats);
	
    if(ruleset.size() > 0)// If any rules for this class
      return rstats.getFiltered(ruleset.size()-1)[1]; // Data not 
    else                                                // covered
      return data; 
  }   
    
  /**
   * Check whether the stopping criterion meets
   *
   * @param rst the statistic of the ruleset
   * @param minDL the min description length so far
   * @param dl the current description length of the ruleset
   * @return true if stop criterion meets, false otherwise
   */
  private boolean checkStop(double[] rst, double minDL, double dl){
	
    if(dl > minDL+MAX_DL_SURPLUS){
      if(m_Debug)
	System.err.println("DL too large: "+dl+" | "+minDL);
      return true;
    }
    else if(!Utils.gr(rst[2], 0.0)){// Covered positives
      if(m_Debug)
	System.err.println("Too few positives.");
      return true;
    }	
    else if((rst[4]/rst[0]) >= 0.5){// Err rate
      if(m_CheckErr){
	if(m_Debug)
	  System.err.println("Error too large: "+
			     rst[4] + "/" + rst[0]);
	return  true;
      }
      else
	return false;
    }		
    else{// Not stops
      if(m_Debug)
	System.err.println("Continue.");
      return  false;
    }				
  }
 
  /**
   * Prints the all the rules of the rule learner.
   *
   * @return a textual description of the classifier
   */
  public String toString() {
    if (m_Ruleset == null) 
      return "JRIP: No model built yet.";
	
    StringBuffer sb = new StringBuffer("JRIP rules:\n"+
				       "===========\n\n"); 
    for(int j=0; j<m_RulesetStats.size(); j++){
      RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
      FastVector rules = rs.getRuleset();
      for(int k=0; k<rules.size(); k++){
	double[] simStats = rs.getSimpleStats(k);
	sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
		  + " ("+simStats[0]+"/"+simStats[4]+")\n");
      }			    
    }
    if(m_Debug){
      System.err.println("Inside m_Ruleset");
      for(int i=0; i<m_Ruleset.size(); i++)
	System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));
    }
    sb.append("\nNumber of Rules : " 
	      + m_Ruleset.size() + "\n");
    return sb.toString();
  }
    
  /**
   * Main method.
   *
   * @param args the options for the classifier
   */
  public static void main(String[] args) {	
    try {
      System.out.println(Evaluation.evaluateModel(new JRip(), args));
    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  } 
}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -