⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 racesearch.java

📁 6个特征提取的机器学习方法
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    }    return bestSubset;  }  public double [][] rankedAttributes() throws Exception {    if (!m_rankingRequested) {      throw new Exception("Need to request a ranked list of attributes "			  +"before attributes can be ranked (RaceSearch).");    }    if (m_rankedAtts == null) {      throw new Exception("Search must be performed before attributes "			  +"can be ranked (RaceSearch).");    }        double [][] final_rank = new double [m_rankedSoFar][2];    for (int i=0;i<m_rankedSoFar;i++) {      final_rank[i][0] = m_rankedAtts[i][0];      final_rank[i][1] = m_rankedAtts[i][1];    }    if (m_numToSelect <= 0) {      if (m_threshold == -Double.MAX_VALUE) {	m_calculatedNumToSelect = final_rank.length;      } else {	determineNumToSelectFromThreshold(final_rank);      }    }    return final_rank;  }  private void determineNumToSelectFromThreshold(double [][] ranking) {    int count = 0;    for (int i = 0; i < ranking.length; i++) {      if (ranking[i][1] > m_threshold) {	count++;      }    }    m_calculatedNumToSelect = count;  }  /**   * Print an attribute set.   */  private String printSets(char [][]raceSets) {    StringBuffer temp = new StringBuffer();    for (int i=0;i<raceSets.length;i++) {      for (int j=0;j<m_numAttribs;j++) {	temp.append(raceSets[i][j]);      }      temp.append('\n');    }    return temp.toString();  }  /**   * Performs a schemata race---a series of races in parallel.   * @param data the instances to estimate accuracy over.   * @param random a random number generator   * @return an array of selected attribute indices.   */  private int [] schemataRace(Instances data, Random random) throws Exception {    // # races, 2 (competitors in each race), # attributes    char [][][] parallelRaces;    int numRaces = m_numAttribs-1;    Random r = new Random(42);    int numInstances = data.numInstances();    Instances trainCV; Instances testCV;    Instance testInstance;    // statistics on the racers    Stats [][] raceStats = new Stats[numRaces][2];        parallelRaces = new char [numRaces][2][m_numAttribs-1];    char [] base = new char [m_numAttribs];    for (int i=0;i<m_numAttribs;i++) {      base[i] = '*';    }    int count=0;    // set up initial races    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {	parallelRaces[count][0] = (char [])base.clone();	parallelRaces[count][1] = (char [])base.clone();	parallelRaces[count][0][i] = '1';	parallelRaces[count++][1][i] = '0';      }    }        if (m_debug) {      System.err.println("Initial sets:\n");      for (int i=0;i<numRaces;i++) {	System.err.print(printSets(parallelRaces[i])+"--------------\n");      }    }        BitSet randomB = new BitSet(m_numAttribs);    char [] randomBC = new char [m_numAttribs];    // notes which bit positions have been decided    boolean [] attributeConstraints = new boolean[m_numAttribs];    double error;    int evaluationCount = 0;    raceSet: while (numRaces > 0) {      boolean won = false;      for (int i=0;i<numRaces;i++) {	raceStats[i][0] = new Stats();	raceStats[i][1] = new Stats();      }      // keep an eye on how many test instances have been randomly sampled      int sampleCount = 0;      // run the current set of races      while (!won) {	// generate a random binary string	for (int i=0;i<m_numAttribs;i++) {	  if (i != m_classIndex) {	    if (!attributeConstraints[i]) {	      if (r.nextDouble() < 0.5) {		randomB.set(i);	      } else {		randomB.clear(i);	      }	    } else { // this position has been decided from previous races	      if (base[i] == '1') { 		randomB.set(i);	      } else {		randomB.clear(i);	      }	    }	  }	}		// randomly select an instance to test on	int testIndex = Math.abs(r.nextInt() % numInstances);        // We want to randomize the data the same way for every         // learning scheme.	trainCV = data.trainCV(numInstances, testIndex, new Random (1));	testCV = data.testCV(numInstances, testIndex);	testInstance = testCV.instance(0);	sampleCount++;	/*	if (sampleCount > numInstances) {	  throw new Exception("raceSchemata: No clear winner after sampling "			      +sampleCount+" instances.");			      } */		m_theEvaluator.buildEvaluator(trainCV);		// the evaluator must retrain for every test point	error = -((HoldOutSubsetEvaluator)m_theEvaluator).	  evaluateSubset(randomB, 			 testInstance,			 true);	evaluationCount++;		// see which racers match this random subset	for (int i=0;i<m_numAttribs;i++) {	  if (randomB.get(i)) {	    randomBC[i] = '1';	  } else {	    randomBC[i] = '0';	  }	}	//	System.err.println("Random subset: "+(new String(randomBC)));        checkRaces: for (int i=0;i<numRaces;i++) {	  // if a pair of racers has evaluated more than num instances	  // then bail out---unlikely that having any more atts is any	  // better than the current base set.	  if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > 	      (numInstances)) {	    break raceSet;	  }	  for (int j=0;j<2;j++) {	    boolean matched = true;	    for (int k =0;k<m_numAttribs;k++) {	      if (parallelRaces[i][j][k] != '*') {		if (parallelRaces[i][j][k] != randomBC[k]) {		  matched = false;		  break;		}	      }	    }	    if (matched) { // update the stats for this racer	      //	      System.err.println("Matched "+i+" "+j);	      raceStats[i][j].add(error);		// does this race have a clear winner, meaning we can		// terminate the whole set of parallel races?		if (raceStats[i][0].count > m_samples &&		    raceStats[i][1].count > m_samples) {		  raceStats[i][0].calculateDerived();		  raceStats[i][1].calculateDerived();		  //		  System.err.println(j+" : "+(new String(parallelRaces[i][j])));		  //		  System.err.println(raceStats[i][0]);		  //		  System.err.println(raceStats[i][1]);		  // check the ttest		  double prob = ttest(raceStats[i][0], raceStats[i][1]);		  //		  System.err.println("Prob :"+prob);		  if (prob < m_sigLevel) { // stop the races we have a winner!		    if (raceStats[i][0].mean < raceStats[i][1].mean) {		      base = (char [])parallelRaces[i][0].clone();		      m_bestMerit = raceStats[i][0].mean;		      if (m_debug) {			System.err.println("contender 0 won ");		      }		    } else {		      base = (char [])parallelRaces[i][1].clone();		      m_bestMerit = raceStats[i][1].mean;		      if (m_debug) {			System.err.println("contender 1 won");		      }		    }		    if (m_debug) {		      System.err.println((new String(parallelRaces[i][0]))				 +" "+(new String(parallelRaces[i][1])));		      System.err.println("Means : "+raceStats[i][0].mean					 +" vs"+raceStats[i][1].mean);		      System.err.println("Evaluations so far : "					 +evaluationCount);		    }		    won = true;		    break checkRaces;		  }		}	     	    }	  }	}      }      numRaces--;      // set up the next set of races if necessary      if (numRaces > 0 && won) {	parallelRaces = new char [numRaces][2][m_numAttribs-1];	raceStats = new Stats[numRaces][2];	// update the attribute constraints	for (int i=0;i<m_numAttribs;i++) {	  if (i != m_classIndex && !attributeConstraints[i] &&	      base[i] != '*') {	    attributeConstraints[i] = true;	    break;	  }	}	count=0;	for (int i=0;i<numRaces;i++) {	  parallelRaces[i][0] = (char [])base.clone();	  parallelRaces[i][1] = (char [])base.clone();	  for (int j=count;j<m_numAttribs;j++) {	    if (j != m_classIndex && parallelRaces[i][0][j] == '*') {	      parallelRaces[i][0][j] = '1';	      parallelRaces[i][1][j] = '0';	      count = j+1;	      break;	    }	  }	}		if (m_debug) {	  System.err.println("Next sets:\n");	  for (int i=0;i<numRaces;i++) {	    System.err.print(printSets(parallelRaces[i])+"--------------\n");	  }	}      }    }    if (m_debug) {      System.err.println("Total evaluations : "			 +evaluationCount);    }    return attributeList(base);  }  /**    * t-test for unequal sample sizes and same variance. Returns probability   * that observed difference in means is due to chance.   */  private double ttest(Stats c1, Stats c2) throws Exception {    double n1 = c1.count; double n2 = c2.count;    double v1 = c1.stdDev * c1.stdDev;    double v2 = c2.stdDev * c2.stdDev;    double av1 = c1.mean;    double av2 = c2.mean;        double df = n1 + n2 - 2;    double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df;    double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2)));        return Statistics.incompleteBeta(df / 2.0, 0.5,				     df / (df + (t * t)));  }      /**   * Performs a rank race---race consisting of no attributes, the top   * ranked attribute, the top two attributes etc. The initial ranking   * is determined by an attribute evaluator.   * @param data the instances to estimate accuracy over   * @param random a random number generator   * @return an array of selected attribute indices.   */  private int [] rankRace(Instances data, Random random) throws Exception {    char [] baseSet = new char [m_numAttribs];    char [] bestSet;    double bestSetError;    for (int i=0;i<m_numAttribs;i++) {      if (i == m_classIndex) {	baseSet[i] = '-';      } else {	baseSet[i] = '0';      }    }    int numCompetitors = m_numAttribs-1;    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];        if (m_ASEval instanceof AttributeEvaluator) {      // generate the attribute ranking first      Ranker ranker = new Ranker();      ((AttributeEvaluator)m_ASEval).buildEvaluator(data);      m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data);    } else {      GreedyStepwise fs = new GreedyStepwise();      double [][]rankres;       fs.setGenerateRanking(true);      ((SubsetEvaluator)m_ASEval).buildEvaluator(data);      fs.search(m_ASEval, data);      rankres = fs.rankedAttributes();      m_Ranking = new int[rankres.length];      for (int i=0;i<rankres.length;i++) {	m_Ranking[i] = (int)rankres[i][0];      }    }    // set up the race    raceSets[0] = (char [])baseSet.clone();    for (int i=0;i<m_Ranking.length;i++) {      raceSets[i+1] = (char [])raceSets[i].clone();      raceSets[i+1][m_Ranking[i]] = '1';    }        if (m_debug) {      System.err.println("Initial sets:\n"+printSets(raceSets));    }        // run the race    double [] winnerInfo = raceSubsets(raceSets, data, true, random);    bestSetError = winnerInfo[1];    bestSet = (char [])raceSets[(int)winnerInfo[0]].clone();    m_bestMerit = bestSetError;    return attributeList(bestSet);  }    /**   * Performs a hill climbing race---all single attribute changes to a   * base subset are raced in parallel. The winner is chosen and becomes   * the new base subset and the process is repeated until there is no   * improvement in error over the base subset.   * @param data the instances to estimate accuracy over   * @param random a random number generator   * @return an array of selected attribute indices.   * @throws Exception if something goes wrong   */  private int [] hillclimbRace(Instances data, Random random) throws Exception {    double baseSetError;    char [] baseSet = new char [m_numAttribs];    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {	if (m_raceType == FORWARD_RACE) {	  baseSet[i] = '0';	} else {	  baseSet[i] = '1';	}       } else {	baseSet[i] = '-';      }    }    int numCompetitors = m_numAttribs-1;    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];    raceSets[0] = (char [])baseSet.clone();    int count = 1;    // initialize each race set to 1 attribute    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {	raceSets[count] = (char [])baseSet.clone();	if (m_raceType == BACKWARD_RACE) {	  raceSets[count++][i] = '0';	} else {	  raceSets[count++][i] = '1';	}      }    }    if (m_debug) {      System.err.println("Initial sets:\n"+printSets(raceSets));    }        // race the initial sets (base set either no or all features)    double [] winnerInfo = raceSubsets(raceSets, data, true, random);    baseSetError = winnerInfo[1];    m_bestMerit = baseSetError;    baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();    if (m_rankingRequested) {      m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1);      m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];      m_rankedSoFar++;    }    boolean improved = true;    int j;    // now race until there is no improvement over the base set or only    // one competitor remains    while (improved) {      // generate the next set of competitors      numCompetitors--;      if (numCompetitors == 0) { //race finished!	break;      }      j=0;      // +1. we'll race against the base set---might be able to bail out      // of the race if none from the new set are statistically better      // than the base set. Base set is stored in loc 0.      raceSets = new char [numCompetitors+1][m_numAttribs];      for (int i=0;i<numCompetitors+1;i++) {	raceSets[i] = (char [])baseSet.clone();	if (i > 0) {	  for (int k=j;k<m_numAttribs;k++) {	    if (m_raceType == 1) {	      if (k != m_classIndex && raceSets[i][k] != '0') {		raceSets[i][k] = '0';		j = k+1;		break;	      }	    } else {	      if (k != m_classIndex && raceSets[i][k] != '1') {		raceSets[i][k] = '1';

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -