📄 racesearch.java
字号:
} for (int j=0;j<2;j++) { boolean matched = true; for (int k =0;k<m_numAttribs;k++) { if (parallelRaces[i][j][k] != '*') { if (parallelRaces[i][j][k] != randomBC[k]) { matched = false; break; } } } if (matched) { // update the stats for this racer // System.err.println("Matched "+i+" "+j); raceStats[i][j].add(error); // does this race have a clear winner, meaning we can // terminate the whole set of parallel races? if (raceStats[i][0].count > m_samples && raceStats[i][1].count > m_samples) { raceStats[i][0].calculateDerived(); raceStats[i][1].calculateDerived(); // System.err.println(j+" : "+(new String(parallelRaces[i][j]))); // System.err.println(raceStats[i][0]); // System.err.println(raceStats[i][1]); // check the ttest double prob = ttest(raceStats[i][0], raceStats[i][1]); // System.err.println("Prob :"+prob); if (prob < m_sigLevel) { // stop the races we have a winner! if (raceStats[i][0].mean < raceStats[i][1].mean) { base = (char [])parallelRaces[i][0].clone(); m_bestMerit = raceStats[i][0].mean; if (m_debug) { System.err.println("contender 0 won "); } } else { base = (char [])parallelRaces[i][1].clone(); m_bestMerit = raceStats[i][1].mean; if (m_debug) { System.err.println("contender 1 won"); } } if (m_debug) { System.err.println((new String(parallelRaces[i][0])) +" "+(new String(parallelRaces[i][1]))); System.err.println("Means : "+raceStats[i][0].mean +" vs"+raceStats[i][1].mean); System.err.println("Evaluations so far : " +evaluationCount); } won = true; break checkRaces; } } } } } } numRaces--; // set up the next set of races if necessary if (numRaces > 0 && won) { parallelRaces = new char [numRaces][2][m_numAttribs-1]; raceStats = new Stats[numRaces][2]; // update the attribute constraints for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex && !attributeConstraints[i] && base[i] != '*') { attributeConstraints[i] = true; break; } } count=0; for (int i=0;i<numRaces;i++) { parallelRaces[i][0] = (char [])base.clone(); parallelRaces[i][1] = (char [])base.clone(); for (int j=count;j<m_numAttribs;j++) { if (j != m_classIndex && parallelRaces[i][0][j] == '*') { parallelRaces[i][0][j] = '1'; parallelRaces[i][1][j] = '0'; count = j+1; break; } } } if (m_debug) { System.err.println("Next sets:\n"); for (int i=0;i<numRaces;i++) { System.err.print(printSets(parallelRaces[i])+"--------------\n"); } } } } if (m_debug) { System.err.println("Total evaluations : " +evaluationCount); } return attributeList(base); } /** * t-test for unequal sample sizes and same variance. Returns probability * that observed difference in means is due to chance. */ private double ttest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double v1 = c1.stdDev * c1.stdDev; double v2 = c2.stdDev * c2.stdDev; double av1 = c1.mean; double av2 = c2.mean; double df = n1 + n2 - 2; double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df; double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2))); return Statistics.incompleteBeta(df / 2.0, 0.5, df / (df + (t * t))); } /** * Performs a rank race---race consisting of no attributes, the top * ranked attribute, the top two attributes etc. The initial ranking * is determined by an attribute evaluator. * @param data the instances to estimate accuracy over * @param random a random number generator * @return an array of selected attribute indices. */ private int [] rankRace(Instances data, Random random) throws Exception { char [] baseSet = new char [m_numAttribs]; char [] bestSet; double bestSetError; for (int i=0;i<m_numAttribs;i++) { if (i == m_classIndex) { baseSet[i] = '-'; } else { baseSet[i] = '0'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; if (m_ASEval instanceof AttributeEvaluator) { // generate the attribute ranking first Ranker ranker = new Ranker(); ((AttributeEvaluator)m_ASEval).buildEvaluator(data); m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data); } else { GreedyStepwise fs = new GreedyStepwise(); double [][]rankres; fs.setGenerateRanking(true); ((SubsetEvaluator)m_ASEval).buildEvaluator(data); fs.search(m_ASEval, data); rankres = fs.rankedAttributes(); m_Ranking = new int[rankres.length]; for (int i=0;i<rankres.length;i++) { m_Ranking[i] = (int)rankres[i][0]; } } // set up the race raceSets[0] = (char [])baseSet.clone(); for (int i=0;i<m_Ranking.length;i++) { raceSets[i+1] = (char [])raceSets[i].clone(); raceSets[i+1][m_Ranking[i]] = '1'; } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // run the race double [] winnerInfo = raceSubsets(raceSets, data, true, random); bestSetError = winnerInfo[1]; bestSet = (char [])raceSets[(int)winnerInfo[0]].clone(); m_bestMerit = bestSetError; return attributeList(bestSet); } /** * Performs a hill climbing race---all single attribute changes to a * base subset are raced in parallel. The winner is chosen and becomes * the new base subset and the process is repeated until there is no * improvement in error over the base subset. * @param data the instances to estimate accuracy over * @param random a random number generator * @return an array of selected attribute indices. * @throws Exception if something goes wrong */ private int [] hillclimbRace(Instances data, Random random) throws Exception { double baseSetError; char [] baseSet = new char [m_numAttribs]; for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (m_raceType == FORWARD_RACE) { baseSet[i] = '0'; } else { baseSet[i] = '1'; } } else { baseSet[i] = '-'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; raceSets[0] = (char [])baseSet.clone(); int count = 1; // initialize each race set to 1 attribute for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { raceSets[count] = (char [])baseSet.clone(); if (m_raceType == BACKWARD_RACE) { raceSets[count++][i] = '0'; } else { raceSets[count++][i] = '1'; } } } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // race the initial sets (base set either no or all features) double [] winnerInfo = raceSubsets(raceSets, data, true, random); baseSetError = winnerInfo[1]; m_bestMerit = baseSetError; baseSet = (char [])raceSets[(int)winnerInfo[0]].clone(); if (m_rankingRequested) { m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1); m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1]; m_rankedSoFar++; } boolean improved = true; int j; // now race until there is no improvement over the base set or only // one competitor remains while (improved) { // generate the next set of competitors numCompetitors--; if (numCompetitors == 0) { //race finished! break; } j=0; // +1. we'll race against the base set---might be able to bail out // of the race if none from the new set are statistically better // than the base set. Base set is stored in loc 0. raceSets = new char [numCompetitors+1][m_numAttribs]; for (int i=0;i<numCompetitors+1;i++) { raceSets[i] = (char [])baseSet.clone(); if (i > 0) { for (int k=j;k<m_numAttribs;k++) { if (m_raceType == 1) { if (k != m_classIndex && raceSets[i][k] != '0') { raceSets[i][k] = '0'; j = k+1; break; } } else { if (k != m_classIndex && raceSets[i][k] != '1') { raceSets[i][k] = '1'; j = k+1; break; } } } } } if (m_debug) { System.err.println("Next set : \n"+printSets(raceSets)); } improved = false; winnerInfo = raceSubsets(raceSets, data, true, random); String bs = new String(baseSet); String win = new String(raceSets[(int)winnerInfo[0]]); if (bs.compareTo(win) == 0) { // race finished } else { if (winnerInfo[1] < baseSetError || m_rankingRequested) { improved = true; baseSetError = winnerInfo[1]; m_bestMerit = baseSetError; // find which att is different if (m_rankingRequested) { for (int i = 0; i < baseSet.length; i++) { if (win.charAt(i) != bs.charAt(i)) { m_rankedAtts[m_rankedSoFar][0] = i; m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1]; m_rankedSoFar++; } } } baseSet = (char [])raceSets[(int)winnerInfo[0]].clone(); } else { // Will get here for a subset whose error is outside the delta // threshold but is not *significantly* worse than the base // subset //throw new Exception("RaceSearch: problem in hillClimbRace"); } } } return attributeList(baseSet); } /** * Convert an attribute set to an array of indices */ private int [] attributeList(char [] list) { int count = 0; for (int i=0;i<m_numAttribs;i++) { if (list[i] == '1') { count++; } } int [] rlist = new int[count]; count = 0; for (int i=0;i<m_numAttribs;i++) { if (list[i] == '1') { rlist[count++] = i; } } return rlist; } /** * Races the leave-one-out cross validation errors of a set of * attribute subsets on a set of instances. * @param raceSets a set of attribute subset specifications * @param data the instances to use when cross validating * @param baseSetIncluded true if the first attribute set is a * base set generated from the previous race * @param random a random number generator * @return the index of the winning subset * @throws Exception if an error occurs during cross validation */ private double [] raceSubsets(char [][]raceSets, Instances data, boolean baseSetIncluded, Random random) throws Exception { // the evaluators --- one for each subset ASEvaluation [] evaluators = ASEvaluation.makeCopies(m_theEvaluator, raceSets.length); // array of subsets eliminated from the race boolean [] eliminated = new boolean [raceSets.length];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -