📄 jrip.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	oneRule = new RipperRule();	oneRule.setConsequent(classIndex);  // Must set first			if(m_Debug)	  System.err.println("\nGrowing a rule ...");  	oneRule.grow(growData);             // Build the rule	if(m_Debug)	  System.err.println("One rule found before pruning:"+			     oneRule.toString(m_Class));			if(m_Debug)	  System.err.println("\nPruning the rule ...");  	oneRule.prune(pruneData, false);    // Prune the rule	if(m_Debug)	  System.err.println("One rule found after pruning:"+			     oneRule.toString(m_Class));      }      else{	oneRule = new RipperRule();	oneRule.setConsequent(classIndex);  // Must set first	if(m_Debug)	  System.err.println("\nNo pruning: growing a rule ...");	oneRule.grow(newData);             // Build the rule	if(m_Debug)	  System.err.println("No pruning: one rule found:\n"+			     oneRule.toString(m_Class));      }	          // Compute the DL of this ruleset      if(rstats == null){ // First rule	rstats = new RuleStats();	rstats.setNumAllConds(m_Total);	rstats.setData(newData);      }	          rstats.addAndUpdate(oneRule);		          int last = rstats.getRuleset().size()-1; // Index of last rule      dl += rstats.relativeDL(last, expFPRate, m_CheckErr);	          if(Double.isNaN(dl) || Double.isInfinite(dl))	throw new Exception("Should never happen: dl in "+			    "building stage NaN or infinite!");      if(m_Debug)	System.err.println("Before optimization("+last+			   "): the dl = "+dl+" | best: "+minDL);	          if(dl < minDL)	minDL = dl;  // The best dl so far		          rst = rstats.getSimpleStats(last);	          if(m_Debug)	System.err.println("The rule covers: "+rst[0]+			   " | pos = " + rst[2] + 			   " | neg = " + rst[4]+			   "\nThe rule doesn't cover: "+rst[1]+			   " | pos = " + rst[5]);	          stop = checkStop(rst, minDL, dl);	          if(!stop){	  			ruleset.addElement(oneRule);          // Accepted 	newData = rstats.getFiltered(last)[1];// Data not covered	hasPositive = Utils.gr(rst[5], 0.0);  // Positives remaining?	if(m_Debug)	  System.err.println("One rule added: has positive? "			     +hasPositive);      }      else{	if(m_Debug)	  System.err.println("Quit rule");	rstats.removeLast(); // Remove last to be re-used      }    }// while !stop		    /******************** Optimization stage *******************/    RuleStats finalRulesetStat = null;    if(m_UsePruning){	       for(int z=0; z < m_Optimizations; z++){	if(m_Debug)	  System.err.println("\n*** Optimization: run #"			     +z+" ***");			newData = data;		    	finalRulesetStat = new RuleStats();	finalRulesetStat.setData(newData);	finalRulesetStat.setNumAllConds(m_Total);	int position=0;	stop = false;	boolean isResidual = false;	    	hasPositive = defHasPositive;		    	dl = minDL = defDL;		      oneRule:    	while(!stop && hasPositive){					    	  isResidual = (position>=ruleset.size()); // Cover residual positive examples  	  // Re-do shuffling and stratification    	  //newData.randomize(m_Random);		  newData = RuleStats.stratify(newData, m_Folds, m_Random);	  Instances[] part = RuleStats.partition(newData, m_Folds);	  growData=part[0];	  pruneData=part[1];	  //growData=newData.trainCV(m_Folds, m_Folds-1);	  //pruneData=newData.testCV(m_Folds, m_Folds-1);	   	  RipperRule finalRule;		    	  if(m_Debug)	    System.err.println("\nRule #"+position +			       "| isResidual?" + isResidual+			       "| data size: "+newData.sumOfWeights());		    	  if(isResidual){	    RipperRule newRule = new RipperRule();   	    newRule.setConsequent(classIndex);	    if(m_Debug)	      System.err.println("\nGrowing and pruning"+				 " a new rule ..."); 	    newRule.grow(growData);	    newRule.prune(pruneData, false);	    finalRule = newRule;	    if(m_Debug)	      System.err.println("\nNew rule found: "+				 newRule.toString(m_Class));	  }	  else{	    RipperRule oldRule = (RipperRule)ruleset.elementAt(position);	    boolean covers = false;	    // Test coverage of the next old rule	    for(int i=0; i<newData.numInstances(); i++)	      if(oldRule.covers(newData.instance(i))){		covers = true;		break;	      }				    if(!covers){// Null coverage, no variants can be generated	      finalRulesetStat.addAndUpdate(oldRule);	      position++;	      continue oneRule;	    }  				    // 2 variants 	    if(m_Debug)	      System.err.println("\nGrowing and pruning"+				 " Replace ..."); 	    RipperRule replace = new RipperRule();   	    replace.setConsequent(classIndex);	    replace.grow(growData);				    // Remove the pruning data covered by the following	    // rules, then simply compute the error rate of the	    // current rule to prune it.  According to Ripper,	    // it's equivalent to computing the error of the 	    // whole ruleset -- is it true?	    pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);      		    replace.prune(pruneData, true);				    if(m_Debug)	      System.err.println("\nGrowing and pruning"+				 " Revision ..."); 	    RipperRule revision = (RipperRule)oldRule.copy(); 				    // For revision, first rm the data covered by the old rule	    Instances newGrowData = new Instances(growData, 0);	    for(int b=0; b<growData.numInstances(); b++){	      Instance inst = growData.instance(b);	      if(revision.covers(inst))		newGrowData.add(inst);	    }	    revision.grow(newGrowData);	      	    revision.prune(pruneData, true);				    double[][] prevRuleStats = new double[position][6];	    for(int c=0; c < position; c++)		prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);	    // Now compare the relative DL of variants	    FastVector tempRules = (FastVector)ruleset.copyElements();	    tempRules.setElementAt(replace, position);				    RuleStats repStat = new RuleStats(data, tempRules);	    repStat.setNumAllConds(m_Total);	    repStat.countData(position, newData, prevRuleStats);	    //repStat.countData();	    rst = repStat.getSimpleStats(position);	    	    if(m_Debug)	      System.err.println("Replace rule covers: "+rst[0]+				 " | pos = " + rst[2] + 				 " | neg = " + rst[4]+				 "\nThe rule doesn't cover: "+rst[1]+				 " | pos = " + rst[5]);				    double repDL = repStat.relativeDL(position, expFPRate,					      m_CheckErr);	    if(m_Debug)	      System.err.println("\nReplace: "+				 replace.toString(m_Class)				 +" |dl = "+repDL); 				    if(Double.isNaN(repDL) || Double.isInfinite(repDL))	      throw new Exception("Should never happen: repDL"+				  "in optmz. stage NaN or "+				  "infinite!");				    tempRules.setElementAt(revision, position);	    RuleStats revStat = new RuleStats(data, tempRules);	    revStat.setNumAllConds(m_Total);	    revStat.countData(position, newData, prevRuleStats);	    //revStat.countData();	    double revDL = revStat.relativeDL(position, expFPRate,					      m_CheckErr);				    if(m_Debug)	      System.err.println("Revision: "				 + revision.toString(m_Class)				 +" |dl = "+revDL);				    if(Double.isNaN(revDL) || Double.isInfinite(revDL))	      throw new Exception("Should never happen: revDL"+				  "in optmz. stage NaN or "+				  "infinite!");				    rstats = new RuleStats(data, ruleset);	    rstats.setNumAllConds(m_Total);	    rstats.countData(position, newData, prevRuleStats);	    //rstats.countData();	    double oldDL = rstats.relativeDL(position, expFPRate,					     m_CheckErr);				    if(Double.isNaN(oldDL) || Double.isInfinite(oldDL))	      throw new Exception("Should never happen: oldDL"+				  "in optmz. stage NaN or "+				  "infinite!");	    if(m_Debug)	      System.err.println("Old rule: "+				 oldRule.toString(m_Class)				 +" |dl = "+oldDL); 				    if(m_Debug)	      System.err.println("\nrepDL: "+repDL+ 				 "\nrevDL: "+revDL+				 "\noldDL: "+oldDL);				    if((oldDL <= revDL) && (oldDL <= repDL))	      finalRule = oldRule; // Old the best	    else if(revDL <= repDL)	      finalRule = revision; // Revision the best	    else	      finalRule = replace; // Replace the best  	  }				    	  finalRulesetStat.addAndUpdate(finalRule);  	 	  rst = finalRulesetStat.getSimpleStats(position);		    	  if(isResidual){				    dl += finalRulesetStat.relativeDL(position, 					      expFPRate,					      m_CheckErr);	    if(m_Debug)	      System.err.println("After optimization: the dl"				 +"="+dl+" | best: "+minDL);				    if(dl < minDL)	      minDL = dl;  // The best dl so far				    stop = checkStop(rst, minDL, dl);	    if(!stop)	      ruleset.addElement(finalRule); // Accepted 	    else{	      finalRulesetStat.removeLast(); // Remove last to be re-used	      position--;	    }	  }	  else	    ruleset.setElementAt(finalRule, position); // Accepted 	  if(m_Debug){	    System.err.println("The rule covers: "+rst[0]+			       " | pos = " + rst[2] + 			       " | neg = " + rst[4]+			       "\nThe rule doesn't cover: "+rst[1]+			       " | pos = " + rst[5]);			    System.err.println("\nRuleset so far: ");	    for(int x=0; x<ruleset.size(); x++)	      System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));	    System.err.println();	  }		    	  //Data not covered		  if(finalRulesetStat.getRulesetSize() > 0)// If any rules		    newData = finalRulesetStat.getFiltered(position)[1]; 	  hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining? 	  position++;	} // while !stop && hasPositive			if(ruleset.size() > (position+1)){ // Hasn't gone through yet	  for(int k=position+1; k<ruleset.size(); k++)	    finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));	}	if(m_Debug)	  System.err.println("\nDeleting rules to decrease"+			     " DL of the whole ruleset ..."); 	finalRulesetStat.reduceDL(expFPRate, m_CheckErr);	if(m_Debug){	  int del = ruleset.size() -	    finalRulesetStat.getRulesetSize(); 	  System.err.println(del+" rules are deleted"+			     " after DL reduction procedure");	}	ruleset = finalRulesetStat.getRuleset();	rstats = finalRulesetStat;	      	    		      } // For each run of optimization    } // if pruning is used	    // Concatenate the ruleset for this class to the whole ruleset    if(m_Debug){      System.err.println("\nFinal ruleset: ");      for(int x=0; x<ruleset.size(); x++)	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));      System.err.println();    }	    m_Ruleset.appendElements(ruleset);    m_RulesetStats.addElement(rstats);	    if(ruleset.size() > 0)// If any rules for this class      return rstats.getFiltered(ruleset.size()-1)[1]; // Data not     else                                                // covered      return data;   }         /**   * Check whether the stopping criterion meets   *   * @param rst the statistic of the ruleset   * @param minDL the min description length so far   * @param dl the current description length of the ruleset   * @return true if stop criterion meets, false otherwise   */  private boolean checkStop(double[] rst, double minDL, double dl){	    if(dl > minDL+MAX_DL_SURPLUS){      if(m_Debug)	System.err.println("DL too large: "+dl+" | "+minDL);      return true;    }    else if(!Utils.gr(rst[2], 0.0)){// Covered positives      if(m_Debug)	System.err.println("Too few positives.");      return true;    }	    else if((rst[4]/rst[0]) >= 0.5){// Err rate      if(m_CheckErr){	if(m_Debug)	  System.err.println("Error too large: "+			     rst[4] + "/" + rst[0]);	return  true;      }      else	return false;    }		    else{// Not stops      if(m_Debug)	System.err.println("Continue.");      return  false;    }				  }   /**   * Prints the all the rules of the rule learner.   *   * @return a textual description of the classifier   */  public String toString() {    if (m_Ruleset == null)       return "JRIP: No model built yet.";	    StringBuffer sb = new StringBuffer("JRIP rules:\n"+				       "===========\n\n");     for(int j=0; j<m_RulesetStats.size(); j++){      RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);      FastVector rules = rs.getRuleset();      for(int k=0; k<rules.size(); k++){	double[] simStats = rs.getSimpleStats(k);	sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)		  + " ("+simStats[0]+"/"+simStats[4]+")\n");      }			        }    if(m_Debug){      System.err.println("Inside m_Ruleset");      for(int i=0; i<m_Ruleset.size(); i++)	System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));    }    sb.append("\nNumber of Rules : " 	      + m_Ruleset.size() + "\n");    return sb.toString();  }      /**   * Main method.   *   * @param args the options for the classifier   */  public static void main(String[] args) {	    try {      System.out.println(Evaluation.evaluateModel(new JRip(), args));    } catch (Exception e) {      e.printStackTrace();      System.err.println(e.getMessage());    }  } }
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -