📄 racesearch.java
字号:
} return bestSubset; } public double [][] rankedAttributes() throws Exception { if (!m_rankingRequested) { throw new Exception("Need to request a ranked list of attributes " +"before attributes can be ranked (RaceSearch)."); } if (m_rankedAtts == null) { throw new Exception("Search must be performed before attributes " +"can be ranked (RaceSearch)."); } double [][] final_rank = new double [m_rankedSoFar][2]; for (int i=0;i<m_rankedSoFar;i++) { final_rank[i][0] = m_rankedAtts[i][0]; final_rank[i][1] = m_rankedAtts[i][1]; } if (m_numToSelect <= 0) { if (m_threshold == -Double.MAX_VALUE) { m_calculatedNumToSelect = final_rank.length; } else { determineNumToSelectFromThreshold(final_rank); } } return final_rank; } private void determineNumToSelectFromThreshold(double [][] ranking) { int count = 0; for (int i = 0; i < ranking.length; i++) { if (ranking[i][1] > m_threshold) { count++; } } m_calculatedNumToSelect = count; } /** * Print an attribute set. */ private String printSets(char [][]raceSets) { StringBuffer temp = new StringBuffer(); for (int i=0;i<raceSets.length;i++) { for (int j=0;j<m_numAttribs;j++) { temp.append(raceSets[i][j]); } temp.append('\n'); } return temp.toString(); } /** * Performs a schemata race---a series of races in parallel. * @param data the instances to estimate accuracy over. * @param random a random number generator * @return an array of selected attribute indices. */ private int [] schemataRace(Instances data, Random random) throws Exception { // # races, 2 (competitors in each race), # attributes char [][][] parallelRaces; int numRaces = m_numAttribs-1; Random r = new Random(42); int numInstances = data.numInstances(); Instances trainCV; Instances testCV; Instance testInstance; // statistics on the racers Stats [][] raceStats = new Stats[numRaces][2]; parallelRaces = new char [numRaces][2][m_numAttribs-1]; char [] base = new char [m_numAttribs]; for (int i=0;i<m_numAttribs;i++) { base[i] = '*'; } int count=0; // set up initial races for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { parallelRaces[count][0] = (char [])base.clone(); parallelRaces[count][1] = (char [])base.clone(); parallelRaces[count][0][i] = '1'; parallelRaces[count++][1][i] = '0'; } } if (m_debug) { System.err.println("Initial sets:\n"); for (int i=0;i<numRaces;i++) { System.err.print(printSets(parallelRaces[i])+"--------------\n"); } } BitSet randomB = new BitSet(m_numAttribs); char [] randomBC = new char [m_numAttribs]; // notes which bit positions have been decided boolean [] attributeConstraints = new boolean[m_numAttribs]; double error; int evaluationCount = 0; raceSet: while (numRaces > 0) { boolean won = false; for (int i=0;i<numRaces;i++) { raceStats[i][0] = new Stats(); raceStats[i][1] = new Stats(); } // keep an eye on how many test instances have been randomly sampled int sampleCount = 0; // run the current set of races while (!won) { // generate a random binary string for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (!attributeConstraints[i]) { if (r.nextDouble() < 0.5) { randomB.set(i); } else { randomB.clear(i); } } else { // this position has been decided from previous races if (base[i] == '1') { randomB.set(i); } else { randomB.clear(i); } } } } // randomly select an instance to test on int testIndex = Math.abs(r.nextInt() % numInstances); // We want to randomize the data the same way for every // learning scheme. trainCV = data.trainCV(numInstances, testIndex, new Random (1)); testCV = data.testCV(numInstances, testIndex); testInstance = testCV.instance(0); sampleCount++; /* if (sampleCount > numInstances) { throw new Exception("raceSchemata: No clear winner after sampling " +sampleCount+" instances."); } */ m_theEvaluator.buildEvaluator(trainCV); // the evaluator must retrain for every test point error = -((HoldOutSubsetEvaluator)m_theEvaluator). evaluateSubset(randomB, testInstance, true); evaluationCount++; // see which racers match this random subset for (int i=0;i<m_numAttribs;i++) { if (randomB.get(i)) { randomBC[i] = '1'; } else { randomBC[i] = '0'; } } // System.err.println("Random subset: "+(new String(randomBC))); checkRaces: for (int i=0;i<numRaces;i++) { // if a pair of racers has evaluated more than num instances // then bail out---unlikely that having any more atts is any // better than the current base set. if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > (numInstances)) { break raceSet; } for (int j=0;j<2;j++) { boolean matched = true; for (int k =0;k<m_numAttribs;k++) { if (parallelRaces[i][j][k] != '*') { if (parallelRaces[i][j][k] != randomBC[k]) { matched = false; break; } } } if (matched) { // update the stats for this racer // System.err.println("Matched "+i+" "+j); raceStats[i][j].add(error); // does this race have a clear winner, meaning we can // terminate the whole set of parallel races? if (raceStats[i][0].count > m_samples && raceStats[i][1].count > m_samples) { raceStats[i][0].calculateDerived(); raceStats[i][1].calculateDerived(); // System.err.println(j+" : "+(new String(parallelRaces[i][j]))); // System.err.println(raceStats[i][0]); // System.err.println(raceStats[i][1]); // check the ttest double prob = ttest(raceStats[i][0], raceStats[i][1]); // System.err.println("Prob :"+prob); if (prob < m_sigLevel) { // stop the races we have a winner! if (raceStats[i][0].mean < raceStats[i][1].mean) { base = (char [])parallelRaces[i][0].clone(); m_bestMerit = raceStats[i][0].mean; if (m_debug) { System.err.println("contender 0 won "); } } else { base = (char [])parallelRaces[i][1].clone(); m_bestMerit = raceStats[i][1].mean; if (m_debug) { System.err.println("contender 1 won"); } } if (m_debug) { System.err.println((new String(parallelRaces[i][0])) +" "+(new String(parallelRaces[i][1]))); System.err.println("Means : "+raceStats[i][0].mean +" vs"+raceStats[i][1].mean); System.err.println("Evaluations so far : " +evaluationCount); } won = true; break checkRaces; } } } } } } numRaces--; // set up the next set of races if necessary if (numRaces > 0 && won) { parallelRaces = new char [numRaces][2][m_numAttribs-1]; raceStats = new Stats[numRaces][2]; // update the attribute constraints for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex && !attributeConstraints[i] && base[i] != '*') { attributeConstraints[i] = true; break; } } count=0; for (int i=0;i<numRaces;i++) { parallelRaces[i][0] = (char [])base.clone(); parallelRaces[i][1] = (char [])base.clone(); for (int j=count;j<m_numAttribs;j++) { if (j != m_classIndex && parallelRaces[i][0][j] == '*') { parallelRaces[i][0][j] = '1'; parallelRaces[i][1][j] = '0'; count = j+1; break; } } } if (m_debug) { System.err.println("Next sets:\n"); for (int i=0;i<numRaces;i++) { System.err.print(printSets(parallelRaces[i])+"--------------\n"); } } } } if (m_debug) { System.err.println("Total evaluations : " +evaluationCount); } return attributeList(base); } /** * t-test for unequal sample sizes and same variance. Returns probability * that observed difference in means is due to chance. */ private double ttest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double v1 = c1.stdDev * c1.stdDev; double v2 = c2.stdDev * c2.stdDev; double av1 = c1.mean; double av2 = c2.mean; double df = n1 + n2 - 2; double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df; double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2))); return Statistics.incompleteBeta(df / 2.0, 0.5, df / (df + (t * t))); } /** * Performs a rank race---race consisting of no attributes, the top * ranked attribute, the top two attributes etc. The initial ranking * is determined by an attribute evaluator. * @param data the instances to estimate accuracy over * @param random a random number generator * @return an array of selected attribute indices. */ private int [] rankRace(Instances data, Random random) throws Exception { char [] baseSet = new char [m_numAttribs]; char [] bestSet; double bestSetError; for (int i=0;i<m_numAttribs;i++) { if (i == m_classIndex) { baseSet[i] = '-'; } else { baseSet[i] = '0'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; if (m_ASEval instanceof AttributeEvaluator) { // generate the attribute ranking first Ranker ranker = new Ranker(); ((AttributeEvaluator)m_ASEval).buildEvaluator(data); m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data); } else { GreedyStepwise fs = new GreedyStepwise(); double [][]rankres; fs.setGenerateRanking(true); ((SubsetEvaluator)m_ASEval).buildEvaluator(data); fs.search(m_ASEval, data); rankres = fs.rankedAttributes(); m_Ranking = new int[rankres.length]; for (int i=0;i<rankres.length;i++) { m_Ranking[i] = (int)rankres[i][0]; } } // set up the race raceSets[0] = (char [])baseSet.clone(); for (int i=0;i<m_Ranking.length;i++) { raceSets[i+1] = (char [])raceSets[i].clone(); raceSets[i+1][m_Ranking[i]] = '1'; } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // run the race double [] winnerInfo = raceSubsets(raceSets, data, true, random); bestSetError = winnerInfo[1]; bestSet = (char [])raceSets[(int)winnerInfo[0]].clone(); m_bestMerit = bestSetError; return attributeList(bestSet); } /** * Performs a hill climbing race---all single attribute changes to a * base subset are raced in parallel. The winner is chosen and becomes * the new base subset and the process is repeated until there is no * improvement in error over the base subset. * @param data the instances to estimate accuracy over * @param random a random number generator * @return an array of selected attribute indices. * @throws Exception if something goes wrong */ private int [] hillclimbRace(Instances data, Random random) throws Exception { double baseSetError; char [] baseSet = new char [m_numAttribs]; for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (m_raceType == FORWARD_RACE) { baseSet[i] = '0'; } else { baseSet[i] = '1'; } } else { baseSet[i] = '-'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; raceSets[0] = (char [])baseSet.clone(); int count = 1; // initialize each race set to 1 attribute for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { raceSets[count] = (char [])baseSet.clone(); if (m_raceType == BACKWARD_RACE) { raceSets[count++][i] = '0'; } else { raceSets[count++][i] = '1'; } } } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // race the initial sets (base set either no or all features) double [] winnerInfo = raceSubsets(raceSets, data, true, random); baseSetError = winnerInfo[1]; m_bestMerit = baseSetError; baseSet = (char [])raceSets[(int)winnerInfo[0]].clone(); if (m_rankingRequested) { m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1); m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1]; m_rankedSoFar++; } boolean improved = true; int j; // now race until there is no improvement over the base set or only // one competitor remains while (improved) { // generate the next set of competitors numCompetitors--; if (numCompetitors == 0) { //race finished! break; } j=0; // +1. we'll race against the base set---might be able to bail out // of the race if none from the new set are statistically better // than the base set. Base set is stored in loc 0. raceSets = new char [numCompetitors+1][m_numAttribs]; for (int i=0;i<numCompetitors+1;i++) { raceSets[i] = (char [])baseSet.clone(); if (i > 0) { for (int k=j;k<m_numAttribs;k++) { if (m_raceType == 1) { if (k != m_classIndex && raceSets[i][k] != '0') { raceSets[i][k] = '0'; j = k+1; break; } } else { if (k != m_classIndex && raceSets[i][k] != '1') { raceSets[i][k] = '1';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -