📄 jrip.java
字号:
oneRule = new RipperRule(); oneRule.setConsequent(classIndex); // Must set first if(m_Debug) System.err.println("\nGrowing a rule ..."); oneRule.grow(growData); // Build the rule if(m_Debug) System.err.println("One rule found before pruning:"+ oneRule.toString(m_Class)); if(m_Debug) System.err.println("\nPruning the rule ..."); oneRule.prune(pruneData, false); // Prune the rule if(m_Debug) System.err.println("One rule found after pruning:"+ oneRule.toString(m_Class)); } else{ oneRule = new RipperRule(); oneRule.setConsequent(classIndex); // Must set first if(m_Debug) System.err.println("\nNo pruning: growing a rule ..."); oneRule.grow(newData); // Build the rule if(m_Debug) System.err.println("No pruning: one rule found:\n"+ oneRule.toString(m_Class)); } // Compute the DL of this ruleset if(rstats == null){ // First rule rstats = new RuleStats(); rstats.setNumAllConds(m_Total); rstats.setData(newData); } rstats.addAndUpdate(oneRule); int last = rstats.getRuleset().size()-1; // Index of last rule dl += rstats.relativeDL(last, expFPRate, m_CheckErr); if(Double.isNaN(dl) || Double.isInfinite(dl)) throw new Exception("Should never happen: dl in "+ "building stage NaN or infinite!"); if(m_Debug) System.err.println("Before optimization("+last+ "): the dl = "+dl+" | best: "+minDL); if(dl < minDL) minDL = dl; // The best dl so far rst = rstats.getSimpleStats(last); if(m_Debug) System.err.println("The rule covers: "+rst[0]+ " | pos = " + rst[2] + " | neg = " + rst[4]+ "\nThe rule doesn't cover: "+rst[1]+ " | pos = " + rst[5]); stop = checkStop(rst, minDL, dl); if(!stop){ ruleset.addElement(oneRule); // Accepted newData = rstats.getFiltered(last)[1];// Data not covered hasPositive = Utils.gr(rst[5], 0.0); // Positives remaining? if(m_Debug) System.err.println("One rule added: has positive? " +hasPositive); } else{ if(m_Debug) System.err.println("Quit rule"); rstats.removeLast(); // Remove last to be re-used } }// while !stop /******************** Optimization stage *******************/ RuleStats finalRulesetStat = null; if(m_UsePruning){ for(int z=0; z < m_Optimizations; z++){ if(m_Debug) System.err.println("\n*** Optimization: run #" +z+" ***"); newData = data; finalRulesetStat = new RuleStats(); finalRulesetStat.setData(newData); finalRulesetStat.setNumAllConds(m_Total); int position=0; stop = false; boolean isResidual = false; hasPositive = defHasPositive; dl = minDL = defDL; oneRule: while(!stop && hasPositive){ isResidual = (position>=ruleset.size()); // Cover residual positive examples // Re-do shuffling and stratification //newData.randomize(m_Random); newData = RuleStats.stratify(newData, m_Folds, m_Random); Instances[] part = RuleStats.partition(newData, m_Folds); growData=part[0]; pruneData=part[1]; //growData=newData.trainCV(m_Folds, m_Folds-1); //pruneData=newData.testCV(m_Folds, m_Folds-1); RipperRule finalRule; if(m_Debug) System.err.println("\nRule #"+position + "| isResidual?" + isResidual+ "| data size: "+newData.sumOfWeights()); if(isResidual){ RipperRule newRule = new RipperRule(); newRule.setConsequent(classIndex); if(m_Debug) System.err.println("\nGrowing and pruning"+ " a new rule ..."); newRule.grow(growData); newRule.prune(pruneData, false); finalRule = newRule; if(m_Debug) System.err.println("\nNew rule found: "+ newRule.toString(m_Class)); } else{ RipperRule oldRule = (RipperRule)ruleset.elementAt(position); boolean covers = false; // Test coverage of the next old rule for(int i=0; i<newData.numInstances(); i++) if(oldRule.covers(newData.instance(i))){ covers = true; break; } if(!covers){// Null coverage, no variants can be generated finalRulesetStat.addAndUpdate(oldRule); position++; continue oneRule; } // 2 variants if(m_Debug) System.err.println("\nGrowing and pruning"+ " Replace ..."); RipperRule replace = new RipperRule(); replace.setConsequent(classIndex); replace.grow(growData); // Remove the pruning data covered by the following // rules, then simply compute the error rate of the // current rule to prune it. According to Ripper, // it's equivalent to computing the error of the // whole ruleset -- is it true? pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position); replace.prune(pruneData, true); if(m_Debug) System.err.println("\nGrowing and pruning"+ " Revision ..."); RipperRule revision = (RipperRule)oldRule.copy(); // For revision, first rm the data covered by the old rule Instances newGrowData = new Instances(growData, 0); for(int b=0; b<growData.numInstances(); b++){ Instance inst = growData.instance(b); if(revision.covers(inst)) newGrowData.add(inst); } revision.grow(newGrowData); revision.prune(pruneData, true); double[][] prevRuleStats = new double[position][6]; for(int c=0; c < position; c++) prevRuleStats[c] = finalRulesetStat.getSimpleStats(c); // Now compare the relative DL of variants FastVector tempRules = (FastVector)ruleset.copyElements(); tempRules.setElementAt(replace, position); RuleStats repStat = new RuleStats(data, tempRules); repStat.setNumAllConds(m_Total); repStat.countData(position, newData, prevRuleStats); //repStat.countData(); rst = repStat.getSimpleStats(position); if(m_Debug) System.err.println("Replace rule covers: "+rst[0]+ " | pos = " + rst[2] + " | neg = " + rst[4]+ "\nThe rule doesn't cover: "+rst[1]+ " | pos = " + rst[5]); double repDL = repStat.relativeDL(position, expFPRate, m_CheckErr); if(m_Debug) System.err.println("\nReplace: "+ replace.toString(m_Class) +" |dl = "+repDL); if(Double.isNaN(repDL) || Double.isInfinite(repDL)) throw new Exception("Should never happen: repDL"+ "in optmz. stage NaN or "+ "infinite!"); tempRules.setElementAt(revision, position); RuleStats revStat = new RuleStats(data, tempRules); revStat.setNumAllConds(m_Total); revStat.countData(position, newData, prevRuleStats); //revStat.countData(); double revDL = revStat.relativeDL(position, expFPRate, m_CheckErr); if(m_Debug) System.err.println("Revision: " + revision.toString(m_Class) +" |dl = "+revDL); if(Double.isNaN(revDL) || Double.isInfinite(revDL)) throw new Exception("Should never happen: revDL"+ "in optmz. stage NaN or "+ "infinite!"); rstats = new RuleStats(data, ruleset); rstats.setNumAllConds(m_Total); rstats.countData(position, newData, prevRuleStats); //rstats.countData(); double oldDL = rstats.relativeDL(position, expFPRate, m_CheckErr); if(Double.isNaN(oldDL) || Double.isInfinite(oldDL)) throw new Exception("Should never happen: oldDL"+ "in optmz. stage NaN or "+ "infinite!"); if(m_Debug) System.err.println("Old rule: "+ oldRule.toString(m_Class) +" |dl = "+oldDL); if(m_Debug) System.err.println("\nrepDL: "+repDL+ "\nrevDL: "+revDL+ "\noldDL: "+oldDL); if((oldDL <= revDL) && (oldDL <= repDL)) finalRule = oldRule; // Old the best else if(revDL <= repDL) finalRule = revision; // Revision the best else finalRule = replace; // Replace the best } finalRulesetStat.addAndUpdate(finalRule); rst = finalRulesetStat.getSimpleStats(position); if(isResidual){ dl += finalRulesetStat.relativeDL(position, expFPRate, m_CheckErr); if(m_Debug) System.err.println("After optimization: the dl" +"="+dl+" | best: "+minDL); if(dl < minDL) minDL = dl; // The best dl so far stop = checkStop(rst, minDL, dl); if(!stop) ruleset.addElement(finalRule); // Accepted else{ finalRulesetStat.removeLast(); // Remove last to be re-used position--; } } else ruleset.setElementAt(finalRule, position); // Accepted if(m_Debug){ System.err.println("The rule covers: "+rst[0]+ " | pos = " + rst[2] + " | neg = " + rst[4]+ "\nThe rule doesn't cover: "+rst[1]+ " | pos = " + rst[5]); System.err.println("\nRuleset so far: "); for(int x=0; x<ruleset.size(); x++) System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class)); System.err.println(); } //Data not covered if(finalRulesetStat.getRulesetSize() > 0)// If any rules newData = finalRulesetStat.getFiltered(position)[1]; hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining? position++; } // while !stop && hasPositive if(ruleset.size() > (position+1)){ // Hasn't gone through yet for(int k=position+1; k<ruleset.size(); k++) finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k)); } if(m_Debug) System.err.println("\nDeleting rules to decrease"+ " DL of the whole ruleset ..."); finalRulesetStat.reduceDL(expFPRate, m_CheckErr); if(m_Debug){ int del = ruleset.size() - finalRulesetStat.getRulesetSize(); System.err.println(del+" rules are deleted"+ " after DL reduction procedure"); } ruleset = finalRulesetStat.getRuleset(); rstats = finalRulesetStat; } // For each run of optimization } // if pruning is used // Concatenate the ruleset for this class to the whole ruleset if(m_Debug){ System.err.println("\nFinal ruleset: "); for(int x=0; x<ruleset.size(); x++) System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class)); System.err.println(); } m_Ruleset.appendElements(ruleset); m_RulesetStats.addElement(rstats); if(ruleset.size() > 0)// If any rules for this class return rstats.getFiltered(ruleset.size()-1)[1]; // Data not else // covered return data; } /** * Check whether the stopping criterion meets * * @param rst the statistic of the ruleset * @param minDL the min description length so far * @param dl the current description length of the ruleset * @return true if stop criterion meets, false otherwise */ private boolean checkStop(double[] rst, double minDL, double dl){ if(dl > minDL+MAX_DL_SURPLUS){ if(m_Debug) System.err.println("DL too large: "+dl+" | "+minDL); return true; } else if(!Utils.gr(rst[2], 0.0)){// Covered positives if(m_Debug) System.err.println("Too few positives."); return true; } else if((rst[4]/rst[0]) >= 0.5){// Err rate if(m_CheckErr){ if(m_Debug) System.err.println("Error too large: "+ rst[4] + "/" + rst[0]); return true; } else return false; } else{// Not stops if(m_Debug) System.err.println("Continue."); return false; } } /** * Prints the all the rules of the rule learner. * * @return a textual description of the classifier */ public String toString() { if (m_Ruleset == null) return "JRIP: No model built yet."; StringBuffer sb = new StringBuffer("JRIP rules:\n"+ "===========\n\n"); for(int j=0; j<m_RulesetStats.size(); j++){ RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j); FastVector rules = rs.getRuleset(); for(int k=0; k<rules.size(); k++){ double[] simStats = rs.getSimpleStats(k); sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class) + " ("+simStats[0]+"/"+simStats[4]+")\n"); } } if(m_Debug){ System.err.println("Inside m_Ruleset"); for(int i=0; i<m_Ruleset.size(); i++) System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class)); } sb.append("\nNumber of Rules : " + m_Ruleset.size() + "\n"); return sb.toString(); } /** * Main method. * * @param args the options for the classifier */ public static void main(String[] args) { try { System.out.println(Evaluation.evaluateModel(new JRip(), args)); } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -