📄 racesearch.java
字号:
// keep an eye on how many test instances have been randomly sampled int sampleCount = 0; // run the current set of races while (!won) { // generate a random binary string for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (!attributeConstraints[i]) { if (r.nextDouble() < 0.5) { randomB.set(i); } else { randomB.clear(i); } } else { // this position has been decided from previous races if (base[i] == '1') { randomB.set(i); } else { randomB.clear(i); } } } } // randomly select an instance to test on int testIndex = Math.abs(r.nextInt() % numInstances); trainCV = data.trainCV(numInstances, testIndex); testCV = data.testCV(numInstances, testIndex); testInstance = testCV.instance(0); sampleCount++; /* if (sampleCount > numInstances) { throw new Exception("raceSchemata: No clear winner after sampling " +sampleCount+" instances."); } */ m_theEvaluator.buildEvaluator(trainCV); // the evaluator must retrain for every test point error = -((HoldOutSubsetEvaluator)m_theEvaluator). evaluateSubset(randomB, testInstance, true); evaluationCount++; // see which racers match this random subset for (int i=0;i<m_numAttribs;i++) { if (randomB.get(i)) { randomBC[i] = '1'; } else { randomBC[i] = '0'; } } // System.err.println("Random subset: "+(new String(randomBC))); checkRaces: for (int i=0;i<numRaces;i++) { // if a pair of racers has evaluated more than num instances // then bail out---unlikely that having any more atts is any // better than the current base set. if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > (numInstances)) { break raceSet; } for (int j=0;j<2;j++) { boolean matched = true; for (int k =0;k<m_numAttribs;k++) { if (parallelRaces[i][j][k] != '*') { if (parallelRaces[i][j][k] != randomBC[k]) { matched = false; break; } } } if (matched) { // update the stats for this racer // System.err.println("Matched "+i+" "+j); raceStats[i][j].add(error); // does this race have a clear winner, meaning we can // terminate the whole set of parallel races? if (raceStats[i][0].count > m_samples && raceStats[i][1].count > m_samples) { raceStats[i][0].calculateDerived(); raceStats[i][1].calculateDerived(); // System.err.println(j+" : "+(new String(parallelRaces[i][j]))); // System.err.println(raceStats[i][0]); // System.err.println(raceStats[i][1]); // check the ttest double prob; // first check for difference in variance /*if ((prob = ftest(raceStats[i][0], raceStats[i][1])) < m_sigLevel) { System.err.println("Ftest prob (diff var) : "+prob); prob = tutest(raceStats[i][0], raceStats[i][1]); } else { */ // use ttest for same variance // System.err.println("Ftest prob : "+prob); prob = ttest(raceStats[i][0], raceStats[i][1]); // } // System.err.println("Prob :"+prob); if (prob < m_sigLevel) { // stop the races we have a winner! if (raceStats[i][0].mean < raceStats[i][1].mean) { base = (char [])parallelRaces[i][0].clone(); m_bestMerit = raceStats[i][0].mean; if (m_debug) { System.err.println("contender 0 won "); } } else { base = (char [])parallelRaces[i][1].clone(); m_bestMerit = raceStats[i][1].mean; if (m_debug) { System.err.println("contender 1 won"); } } if (m_debug) { System.err.println((new String(parallelRaces[i][0])) +" "+(new String(parallelRaces[i][1]))); System.err.println("Means : "+raceStats[i][0].mean +" vs"+raceStats[i][1].mean); System.err.println("Evaluations so far : " +evaluationCount); } won = true; break checkRaces; } } } } } } numRaces--; // set up the next set of races if necessary if (numRaces > 0 && won) { parallelRaces = new char [numRaces][2][m_numAttribs-1]; raceStats = new Stats[numRaces][2]; // update the attribute constraints for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex && !attributeConstraints[i] && base[i] != '*') { attributeConstraints[i] = true; break; } } count=0; for (int i=0;i<numRaces;i++) { parallelRaces[i][0] = (char [])base.clone(); parallelRaces[i][1] = (char [])base.clone(); for (int j=count;j<m_numAttribs;j++) { if (j != m_classIndex && parallelRaces[i][0][j] == '*') { parallelRaces[i][0][j] = '1'; parallelRaces[i][1][j] = '0'; count = j+1; break; } } } if (m_debug) { System.err.println("Next sets:\n"); for (int i=0;i<numRaces;i++) { System.err.print(printSets(parallelRaces[i])+"--------------\n"); } } } } if (m_debug) { System.err.println("Total evaluations : " +evaluationCount); } return attributeList(base); } // Adapted from numerical recipes private double gammln(double xx) { double x,y,tmp,ser; final double [] cof = {76.18009172947146,-86.50532032941677, 24.01409824083091,-1.231739572450155, 0.1208650973866179e-2,-0.5395239384953e-5}; int j; y=x=xx; tmp=x+5.5; tmp -= (x+0.5)*Math.log(tmp); ser=1.000000000190015; for (j=0;j<=5;j++) ser += cof[j]/++y; return -tmp+Math.log(2.5066282746310005*ser/x); } // Adapted from numerical recipes private double betacf(double a, double b, double x) throws Exception { final int maxit = 100; final double eps = 3.0e-7; final double fpmin = 1.0e-30; int m,m2; double aa,c,d,del,h,qab,qam,qap; // System.err.println("a "+a+" b "+b+" x "+x); qab=a+b; qap=a+1.0; qam=a-1.0; c=1.0; d=1.0-qab*x/qap; if (Math.abs(d) < fpmin) d=fpmin; d=1.0/d; h=d; for (m=1;m<=maxit;m++) { m2=2*m; aa=m*(b-m)*x/((qam+m2)*(a+m2)); d=1.0+aa*d; if (Math.abs(d) < fpmin) d=fpmin; c=1.0+aa/c; if (Math.abs(c) < fpmin) c=fpmin; d=1.0/d; h *= d*c; aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2)); d=1.0+aa*d; if (Math.abs(d) < fpmin) d=fpmin; c=1.0+aa/c; if (Math.abs(c) < fpmin) c=fpmin; d=1.0/d; del=d*c; h *= del; if (Math.abs(del-1.0) < eps) break; } if (m > maxit) { throw new Exception("a or b too big, or maxit too small in betacf"); } return h; } // Adapted from numerical recipes private double betai(double a, double b, double x) throws Exception { double bt; // System.err.println("***a "+a+" b "+b+" x "+x); if (x < 0.0 || x > 1.0) { throw new Exception("Bad x in routine betai"); } if (x == 0.0 || x == 1.0) bt=0.0; else bt=Math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*Math.log(x)+b*Math.log(1.0-x)); if (x < (a+1.0)/(a+b+2.0)) return bt*betacf(a,b,x)/a; else return 1.0-bt*betacf(b,a,1.0-x)/b; } // ftest for differences in variance. Returns probability that difference // in variance is due to chance. Adapted from numerical recipes private double ftest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double var1 = c1.stdDev*c1.stdDev; double var2 = c2.stdDev*c2.stdDev; double ave1 = c1.mean; double ave2 = c2.mean; double f,df1,df2,prob; if (var1 > var2) { f=var1/var2; df1=n1-1; df2=n2-1; } else { f=var2/var1; df1=n2-1; df2=n1-1; } prob = 2.0*betai(0.5*df2,0.5*df1,df2/(df2+df1*(f))); if (prob > 1.0) prob=2.0-prob; return prob; } // t-test for unequal sample sizes and same variance. Returns probability // that observed difference in means is due to chance. Adapted from // numerical recipes private double ttest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double var1 = c1.stdDev*c1.stdDev; double var2 = c2.stdDev*c2.stdDev; double ave1 = c1.mean; double ave2 = c2.mean; double df=n1+n2-2; double svar=((n1-1)*var1+(n2-1)*var2)/df; double t=(ave1-ave2)/Math.sqrt(svar*(1.0/n1+1.0/n2)); t = Math.abs(t); // System.err.println("t : "+t); double prob=betai(0.5*df,0.5,df/(df+(t)*(t))); return prob; } // t-test for unequal sample sizes and different variances. Returns the // probability. Adapted from numerical recipes private double tutest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double var1 = c1.stdDev*c1.stdDev; double var2 = c2.stdDev*c2.stdDev; double ave1 = c1.mean; double ave2 = c2.mean; double t=(ave1-ave2)/Math.sqrt(var1/n1+var2/n2); t = Math.abs(t); double df=Math.sqrt(var1/n1+var2/n2)/(Math.sqrt(var1/n1)/(n1-1)+Math.sqrt(var2/n2)/(n2-1)); // System.err.println("t : "+t); // System.err.println("df : "+df); double prob = betai(0.5*df,0.5,df/(df+Math.sqrt(t))); return prob; } /** * Performs a rank race---race consisting of no attributes, the top * ranked attribute, the top two attributes etc. The initial ranking * is determined by an attribute evaluator. * @param data the instances to estimate accuracy over * @return an array of selected attribute indices. */ private int [] rankRace(Instances data) throws Exception { char [] baseSet = new char [m_numAttribs]; char [] bestSet; double bestSetError; for (int i=0;i<m_numAttribs;i++) { if (i == m_classIndex) { baseSet[i] = '-'; } else { baseSet[i] = '0'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; int winner; if (m_ASEval instanceof AttributeEvaluator) { // generate the attribute ranking first Ranker ranker = new Ranker(); ((AttributeEvaluator)m_ASEval).buildEvaluator(data); m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data); } else { ForwardSelection fs = new ForwardSelection(); double [][]rankres; fs.setGenerateRanking(true); ((SubsetEvaluator)m_ASEval).buildEvaluator(data); fs.search(m_ASEval, data); rankres = fs.rankedAttributes(); m_Ranking = new int[rankres.length]; for (int i=0;i<rankres.length;i++) { m_Ranking[i] = (int)rankres[i][0]; } } // set up the race raceSets[0] = (char [])baseSet.clone(); for (int i=0;i<m_Ranking.length;i++) { raceSets[i+1] = (char [])raceSets[i].clone(); raceSets[i+1][m_Ranking[i]] = '1'; } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // run the race double [] winnerInfo = raceSubsets(raceSets, data, true); bestSetError = winnerInfo[1]; bestSet = (char [])raceSets[(int)winnerInfo[0]].clone(); m_bestMerit = bestSetError; return attributeList(bestSet); } /** * Performs a hill climbing race---all single attribute changes to a * base subset are raced in parallel. The winner is chosen and becomes * the new base subset and the process is repeated until there is no * improvement in error over the base subset. * @param data the instances to estimate accuracy over * @return an array of selected attribute indices. */ private int [] hillclimbRace(Instances data) throws Exception { double baseSetError; char [] baseSet = new char [m_numAttribs]; int rankCount = 0; for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (m_raceType == FORWARD_RACE) { baseSet[i] = '0'; } else { baseSet[i] = '1'; } } else { baseSet[i] = '-'; } } int numCompetitors = m_numAttribs-1; char [][] raceSets = new char [numCompetitors+1][m_numAttribs]; int winner; raceSets[0] = (char [])baseSet.clone(); int count = 1; // initialize each race set to 1 attribute for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { raceSets[count] = (char [])baseSet.clone(); if (m_raceType == BACKWARD_RACE) { raceSets[count++][i] = '0'; } else { raceSets[count++][i] = '1'; } } } if (m_debug) { System.err.println("Initial sets:\n"+printSets(raceSets)); } // race the initial sets (base set either no or all features) double [] winnerInfo = raceSubsets(raceSets, data, true); baseSetError = winnerInfo[1]; m_bestMerit = baseSetError; baseSet = (char [])raceSets[(int)winnerInfo[0]].clone(); if (m_rankingRequested) { m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1); m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1]; m_rankedSoFar++; } boolean improved = true; int j; // now race until there is no improvement over the base set or only // one competitor remains while (improved) { // generate the next set of competitors numCompetitors--; if (numCompetitors == 0) { //race finished! break; } j=0; // +1. we'll race against the base set---might be able to bail out // of the race if none from the new set are statistically better // than the base set. Base set is stored in loc 0. raceSets = new char [numCompetitors+1][m_numAttribs]; for (int i=0;i<numCompetitors+1;i++) { raceSets[i] = (char [])baseSet.clone(); if (i > 0) { for (int k=j;k<m_numAttribs;k++) { if (m_raceType == 1) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -