📄 racesearch.java
字号:
StringBuffer temp = new StringBuffer();
for (int i=0;i<raceSets.length;i++) {
for (int j=0;j<m_numAttribs;j++) {
temp.append(raceSets[i][j]);
}
temp.append('\n');
}
return temp.toString();
}
/**
* Performs a schemata race---a series of races in parallel.
* @param data the instances to estimate accuracy over.
* @param random a random number generator
* @return an array of selected attribute indices.
*/
private int [] schemataRace(Instances data, Random random) throws Exception {
// # races, 2 (competitors in each race), # attributes
char [][][] parallelRaces;
int numRaces = m_numAttribs-1;
Random r = new Random(42);
int numInstances = data.numInstances();
Instances trainCV; Instances testCV;
Instance testInstance;
// statistics on the racers
Stats [][] raceStats = new Stats[numRaces][2];
parallelRaces = new char [numRaces][2][m_numAttribs-1];
char [] base = new char [m_numAttribs];
for (int i=0;i<m_numAttribs;i++) {
base[i] = '*';
}
int count=0;
// set up initial races
for (int i=0;i<m_numAttribs;i++) {
if (i != m_classIndex) {
parallelRaces[count][0] = (char [])base.clone();
parallelRaces[count][1] = (char [])base.clone();
parallelRaces[count][0][i] = '1';
parallelRaces[count++][1][i] = '0';
}
}
if (m_debug) {
System.err.println("Initial sets:\n");
for (int i=0;i<numRaces;i++) {
System.err.print(printSets(parallelRaces[i])+"--------------\n");
}
}
BitSet randomB = new BitSet(m_numAttribs);
char [] randomBC = new char [m_numAttribs];
// notes which bit positions have been decided
boolean [] attributeConstraints = new boolean[m_numAttribs];
double error;
int evaluationCount = 0;
raceSet: while (numRaces > 0) {
boolean won = false;
for (int i=0;i<numRaces;i++) {
raceStats[i][0] = new Stats();
raceStats[i][1] = new Stats();
}
// keep an eye on how many test instances have been randomly sampled
int sampleCount = 0;
// run the current set of races
while (!won) {
// generate a random binary string
for (int i=0;i<m_numAttribs;i++) {
if (i != m_classIndex) {
if (!attributeConstraints[i]) {
if (r.nextDouble() < 0.5) {
randomB.set(i);
} else {
randomB.clear(i);
}
} else { // this position has been decided from previous races
if (base[i] == '1') {
randomB.set(i);
} else {
randomB.clear(i);
}
}
}
}
// randomly select an instance to test on
int testIndex = Math.abs(r.nextInt() % numInstances);
trainCV = data.trainCV(numInstances, testIndex, random);
testCV = data.testCV(numInstances, testIndex);
testInstance = testCV.instance(0);
sampleCount++;
/* if (sampleCount > numInstances) {
throw new Exception("raceSchemata: No clear winner after sampling "
+sampleCount+" instances.");
} */
m_theEvaluator.buildEvaluator(trainCV);
// the evaluator must retrain for every test point
error = -((HoldOutSubsetEvaluator)m_theEvaluator).
evaluateSubset(randomB,
testInstance,
true);
evaluationCount++;
// see which racers match this random subset
for (int i=0;i<m_numAttribs;i++) {
if (randomB.get(i)) {
randomBC[i] = '1';
} else {
randomBC[i] = '0';
}
}
// System.err.println("Random subset: "+(new String(randomBC)));
checkRaces: for (int i=0;i<numRaces;i++) {
// if a pair of racers has evaluated more than num instances
// then bail out---unlikely that having any more atts is any
// better than the current base set.
if (((raceStats[i][0].count + raceStats[i][1].count) / 2) >
(numInstances)) {
break raceSet;
}
for (int j=0;j<2;j++) {
boolean matched = true;
for (int k =0;k<m_numAttribs;k++) {
if (parallelRaces[i][j][k] != '*') {
if (parallelRaces[i][j][k] != randomBC[k]) {
matched = false;
break;
}
}
}
if (matched) { // update the stats for this racer
// System.err.println("Matched "+i+" "+j);
raceStats[i][j].add(error);
// does this race have a clear winner, meaning we can
// terminate the whole set of parallel races?
if (raceStats[i][0].count > m_samples &&
raceStats[i][1].count > m_samples) {
raceStats[i][0].calculateDerived();
raceStats[i][1].calculateDerived();
// System.err.println(j+" : "+(new String(parallelRaces[i][j])));
// System.err.println(raceStats[i][0]);
// System.err.println(raceStats[i][1]);
// check the ttest
double prob = ttest(raceStats[i][0], raceStats[i][1]);
// System.err.println("Prob :"+prob);
if (prob < m_sigLevel) { // stop the races we have a winner!
if (raceStats[i][0].mean < raceStats[i][1].mean) {
base = (char [])parallelRaces[i][0].clone();
m_bestMerit = raceStats[i][0].mean;
if (m_debug) {
System.err.println("contender 0 won ");
}
} else {
base = (char [])parallelRaces[i][1].clone();
m_bestMerit = raceStats[i][1].mean;
if (m_debug) {
System.err.println("contender 1 won");
}
}
if (m_debug) {
System.err.println((new String(parallelRaces[i][0]))
+" "+(new String(parallelRaces[i][1])));
System.err.println("Means : "+raceStats[i][0].mean
+" vs"+raceStats[i][1].mean);
System.err.println("Evaluations so far : "
+evaluationCount);
}
won = true;
break checkRaces;
}
}
}
}
}
}
numRaces--;
// set up the next set of races if necessary
if (numRaces > 0 && won) {
parallelRaces = new char [numRaces][2][m_numAttribs-1];
raceStats = new Stats[numRaces][2];
// update the attribute constraints
for (int i=0;i<m_numAttribs;i++) {
if (i != m_classIndex && !attributeConstraints[i] &&
base[i] != '*') {
attributeConstraints[i] = true;
break;
}
}
count=0;
for (int i=0;i<numRaces;i++) {
parallelRaces[i][0] = (char [])base.clone();
parallelRaces[i][1] = (char [])base.clone();
for (int j=count;j<m_numAttribs;j++) {
if (j != m_classIndex && parallelRaces[i][0][j] == '*') {
parallelRaces[i][0][j] = '1';
parallelRaces[i][1][j] = '0';
count = j+1;
break;
}
}
}
if (m_debug) {
System.err.println("Next sets:\n");
for (int i=0;i<numRaces;i++) {
System.err.print(printSets(parallelRaces[i])+"--------------\n");
}
}
}
}
if (m_debug) {
System.err.println("Total evaluations : "
+evaluationCount);
}
return attributeList(base);
}
// t-test for unequal sample sizes and same variance. Returns probability
// that observed difference in means is due to chance.
private double ttest(Stats c1, Stats c2) throws Exception {
double n1 = c1.count; double n2 = c2.count;
double v1 = c1.stdDev * c1.stdDev;
double v2 = c2.stdDev * c2.stdDev;
double av1 = c1.mean;
double av2 = c2.mean;
double df = n1 + n2 - 2;
double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df;
double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2)));
return Statistics.incompleteBeta(df / 2.0, 0.5,
df / (df + (t * t)));
}
/**
* Performs a rank race---race consisting of no attributes, the top
* ranked attribute, the top two attributes etc. The initial ranking
* is determined by an attribute evaluator.
* @param data the instances to estimate accuracy over
* @param random a random number generator
* @return an array of selected attribute indices.
*/
private int [] rankRace(Instances data, Random random) throws Exception {
char [] baseSet = new char [m_numAttribs];
char [] bestSet;
double bestSetError;
for (int i=0;i<m_numAttribs;i++) {
if (i == m_classIndex) {
baseSet[i] = '-';
} else {
baseSet[i] = '0';
}
}
int numCompetitors = m_numAttribs-1;
char [][] raceSets = new char [numCompetitors+1][m_numAttribs];
int winner;
if (m_ASEval instanceof AttributeEvaluator) {
// generate the attribute ranking first
Ranker ranker = new Ranker();
((AttributeEvaluator)m_ASEval).buildEvaluator(data);
m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data);
} else {
GreedyStepwise fs = new GreedyStepwise();
double [][]rankres;
fs.setGenerateRanking(true);
((SubsetEvaluator)m_ASEval).buildEvaluator(data);
fs.search(m_ASEval, data);
rankres = fs.rankedAttributes();
m_Ranking = new int[rankres.length];
for (int i=0;i<rankres.length;i++) {
m_Ranking[i] = (int)rankres[i][0];
}
}
// set up the race
raceSets[0] = (char [])baseSet.clone();
for (int i=0;i<m_Ranking.length;i++) {
raceSets[i+1] = (char [])raceSets[i].clone();
raceSets[i+1][m_Ranking[i]] = '1';
}
if (m_debug) {
System.err.println("Initial sets:\n"+printSets(raceSets));
}
// run the race
double [] winnerInfo = raceSubsets(raceSets, data, true, random);
bestSetError = winnerInfo[1];
bestSet = (char [])raceSets[(int)winnerInfo[0]].clone();
m_bestMerit = bestSetError;
return attributeList(bestSet);
}
/**
* Performs a hill climbing race---all single attribute changes to a
* base subset are raced in parallel. The winner is chosen and becomes
* the new base subset and the process is repeated until there is no
* improvement in error over the base subset.
* @param data the instances to estimate accuracy over
* @param random a random number generator
* @return an array of selected attribute indices.
*/
private int [] hillclimbRace(Instances data, Random random) throws Exception {
double baseSetError;
char [] baseSet = new char [m_numAttribs];
int rankCount = 0;
for (int i=0;i<m_numAttribs;i++) {
if (i != m_classIndex) {
if (m_raceType == FORWARD_RACE) {
baseSet[i] = '0';
} else {
baseSet[i] = '1';
}
} else {
baseSet[i] = '-';
}
}
int numCompetitors = m_numAttribs-1;
char [][] raceSets = new char [numCompetitors+1][m_numAttribs];
int winner;
raceSets[0] = (char [])baseSet.clone();
int count = 1;
// initialize each race set to 1 attribute
for (int i=0;i<m_numAttribs;i++) {
if (i != m_classIndex) {
raceSets[count] = (char [])baseSet.clone();
if (m_raceType == BACKWARD_RACE) {
raceSets[count++][i] = '0';
} else {
raceSets[count++][i] = '1';
}
}
}
if (m_debug) {
System.err.println("Initial sets:\n"+printSets(raceSets));
}
// race the initial sets (base set either no or all features)
double [] winnerInfo = raceSubsets(raceSets, data, true, random);
baseSetError = winnerInfo[1];
m_bestMerit = baseSetError;
baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();
if (m_rankingRequested) {
m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1);
m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];
m_rankedSoFar++;
}
boolean improved = true;
int j;
// now race until there is no improvement over the base set or only
// one competitor remains
while (improved) {
// generate the next set of competitors
numCompetitors--;
if (numCompetitors == 0) { //race finished!
break;
}
j=0;
// +1. we'll race against the base set---might be able to bail out
// of the race if none from the new set are statistically better
// than the base set. Base set is stored in loc 0.
raceSets = new char [numCompetitors+1][m_numAttribs];
for (int i=0;i<numCompetitors+1;i++) {
raceSets[i] = (char [])baseSet.clone();
if (i > 0) {
for (int k=j;k<m_numAttribs;k++) {
if (m_raceType == 1) {
if (k != m_classIndex && raceSets[i][k] != '0') {
raceSets[i][k] = '0';
j = k+1;
break;
}
} else {
if (k != m_classIndex && raceSets[i][k] != '1') {
raceSets[i][k] = '1';
j = k+1;
break;
}
}
}
}
}
if (m_debug) {
System.err.println("Next set : \n"+printSets(raceSets));
}
improved = false;
winnerInfo = raceSubsets(raceSets, data, true, random);
String bs = new String(baseSet);
String win = new String(raceSets[(int)winnerInfo[0]]);
if (bs.compareTo(win) == 0) {
// race finished
} else {
if (winnerInfo[1] < baseSetError || m_rankingRequested) {
improved = true;
baseSetError = winnerInfo[1];
m_bestMerit = baseSetError;
// find which att is different
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -