racesearch.java

来自「这是关于数据挖掘的一些算法」· Java 代码 · 共 1,770 行 · 第 1/5 页
JAVA
1,770 行
          }          for (int j=0;j<2;j++) {            boolean matched = true;            for (int k =0;k<m_numAttribs;k++) {              if (parallelRaces[i][j][k] != '*') {                if (parallelRaces[i][j][k] != randomBC[k]) {                  matched = false;                  break;                }              }            }            if (matched) { // update the stats for this racer              //              System.err.println("Matched "+i+" "+j);              raceStats[i][j].add(error);                // does this race have a clear winner, meaning we can                // terminate the whole set of parallel races?                if (raceStats[i][0].count > m_samples &&                    raceStats[i][1].count > m_samples) {                  raceStats[i][0].calculateDerived();                  raceStats[i][1].calculateDerived();                  //              System.err.println(j+" : "+(new String(parallelRaces[i][j])));                  //              System.err.println(raceStats[i][0]);                  //              System.err.println(raceStats[i][1]);                  // check the ttest                  double prob = ttest(raceStats[i][0], raceStats[i][1]);                  //              System.err.println("Prob :"+prob);                  if (prob < m_sigLevel) { // stop the races we have a winner!                    if (raceStats[i][0].mean < raceStats[i][1].mean) {                      base = (char [])parallelRaces[i][0].clone();                      m_bestMerit = raceStats[i][0].mean;                      if (m_debug) {                        System.err.println("contender 0 won ");                      }                    } else {                      base = (char [])parallelRaces[i][1].clone();                      m_bestMerit = raceStats[i][1].mean;                      if (m_debug) {                        System.err.println("contender 1 won");                      }                    }                    if (m_debug) {                      System.err.println((new String(parallelRaces[i][0]))                                 +" "+(new String(parallelRaces[i][1])));                      System.err.println("Means : "+raceStats[i][0].mean                                         +" vs"+raceStats[i][1].mean);                      System.err.println("Evaluations so far : "                                         +evaluationCount);                    }                    won = true;                    break checkRaces;                  }                }                         }          }        }      }      numRaces--;      // set up the next set of races if necessary      if (numRaces > 0 && won) {        parallelRaces = new char [numRaces][2][m_numAttribs-1];        raceStats = new Stats[numRaces][2];        // update the attribute constraints        for (int i=0;i<m_numAttribs;i++) {          if (i != m_classIndex && !attributeConstraints[i] &&              base[i] != '*') {            attributeConstraints[i] = true;            break;          }        }        count=0;        for (int i=0;i<numRaces;i++) {          parallelRaces[i][0] = (char [])base.clone();          parallelRaces[i][1] = (char [])base.clone();          for (int j=count;j<m_numAttribs;j++) {            if (j != m_classIndex && parallelRaces[i][0][j] == '*') {              parallelRaces[i][0][j] = '1';              parallelRaces[i][1][j] = '0';              count = j+1;              break;            }          }        }                if (m_debug) {          System.err.println("Next sets:\n");          for (int i=0;i<numRaces;i++) {            System.err.print(printSets(parallelRaces[i])+"--------------\n");          }        }      }    }    if (m_debug) {      System.err.println("Total evaluations : "                         +evaluationCount);    }    return attributeList(base);  }  /**    * t-test for unequal sample sizes and same variance. Returns probability   * that observed difference in means is due to chance.   */  private double ttest(Stats c1, Stats c2) throws Exception {    double n1 = c1.count; double n2 = c2.count;    double v1 = c1.stdDev * c1.stdDev;    double v2 = c2.stdDev * c2.stdDev;    double av1 = c1.mean;    double av2 = c2.mean;        double df = n1 + n2 - 2;    double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df;    double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2)));        return Statistics.incompleteBeta(df / 2.0, 0.5,                                     df / (df + (t * t)));  }      /**   * Performs a rank race---race consisting of no attributes, the top   * ranked attribute, the top two attributes etc. The initial ranking   * is determined by an attribute evaluator.   * @param data the instances to estimate accuracy over   * @param random a random number generator   * @return an array of selected attribute indices.   */  private int [] rankRace(Instances data, Random random) throws Exception {    char [] baseSet = new char [m_numAttribs];    char [] bestSet;    double bestSetError;    for (int i=0;i<m_numAttribs;i++) {      if (i == m_classIndex) {        baseSet[i] = '-';      } else {        baseSet[i] = '0';      }    }    int numCompetitors = m_numAttribs-1;    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];        if (m_ASEval instanceof AttributeEvaluator) {      // generate the attribute ranking first      Ranker ranker = new Ranker();      ((AttributeEvaluator)m_ASEval).buildEvaluator(data);      m_Ranking = ranker.search((AttributeEvaluator)m_ASEval,data);    } else {      GreedyStepwise fs = new GreedyStepwise();      double [][]rankres;       fs.setGenerateRanking(true);      ((SubsetEvaluator)m_ASEval).buildEvaluator(data);      fs.search(m_ASEval, data);      rankres = fs.rankedAttributes();      m_Ranking = new int[rankres.length];      for (int i=0;i<rankres.length;i++) {        m_Ranking[i] = (int)rankres[i][0];      }    }    // set up the race    raceSets[0] = (char [])baseSet.clone();    for (int i=0;i<m_Ranking.length;i++) {      raceSets[i+1] = (char [])raceSets[i].clone();      raceSets[i+1][m_Ranking[i]] = '1';    }        if (m_debug) {      System.err.println("Initial sets:\n"+printSets(raceSets));    }        // run the race    double [] winnerInfo = raceSubsets(raceSets, data, true, random);    bestSetError = winnerInfo[1];    bestSet = (char [])raceSets[(int)winnerInfo[0]].clone();    m_bestMerit = bestSetError;    return attributeList(bestSet);  }    /**   * Performs a hill climbing race---all single attribute changes to a   * base subset are raced in parallel. The winner is chosen and becomes   * the new base subset and the process is repeated until there is no   * improvement in error over the base subset.   * @param data the instances to estimate accuracy over   * @param random a random number generator   * @return an array of selected attribute indices.   * @throws Exception if something goes wrong   */  private int [] hillclimbRace(Instances data, Random random) throws Exception {    double baseSetError;    char [] baseSet = new char [m_numAttribs];    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {        if (m_raceType == FORWARD_RACE) {          baseSet[i] = '0';        } else {          baseSet[i] = '1';        }       } else {        baseSet[i] = '-';      }    }    int numCompetitors = m_numAttribs-1;    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];    raceSets[0] = (char [])baseSet.clone();    int count = 1;    // initialize each race set to 1 attribute    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {        raceSets[count] = (char [])baseSet.clone();        if (m_raceType == BACKWARD_RACE) {          raceSets[count++][i] = '0';        } else {          raceSets[count++][i] = '1';        }      }    }    if (m_debug) {      System.err.println("Initial sets:\n"+printSets(raceSets));    }        // race the initial sets (base set either no or all features)    double [] winnerInfo = raceSubsets(raceSets, data, true, random);    baseSetError = winnerInfo[1];    m_bestMerit = baseSetError;    baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();    if (m_rankingRequested) {      m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1);      m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];      m_rankedSoFar++;    }    boolean improved = true;    int j;    // now race until there is no improvement over the base set or only    // one competitor remains    while (improved) {      // generate the next set of competitors      numCompetitors--;      if (numCompetitors == 0) { //race finished!        break;      }      j=0;      // +1. we'll race against the base set---might be able to bail out      // of the race if none from the new set are statistically better      // than the base set. Base set is stored in loc 0.      raceSets = new char [numCompetitors+1][m_numAttribs];      for (int i=0;i<numCompetitors+1;i++) {        raceSets[i] = (char [])baseSet.clone();        if (i > 0) {          for (int k=j;k<m_numAttribs;k++) {            if (m_raceType == 1) {              if (k != m_classIndex && raceSets[i][k] != '0') {                raceSets[i][k] = '0';                j = k+1;                break;              }            } else {              if (k != m_classIndex && raceSets[i][k] != '1') {                raceSets[i][k] = '1';                j = k+1;                break;              }            }          }        }      }            if (m_debug) {        System.err.println("Next set : \n"+printSets(raceSets));      }      improved = false;      winnerInfo = raceSubsets(raceSets, data, true, random);      String bs = new String(baseSet);       String win = new String(raceSets[(int)winnerInfo[0]]);      if (bs.compareTo(win) == 0) {        // race finished      } else {        if (winnerInfo[1] < baseSetError || m_rankingRequested) {          improved = true;          baseSetError = winnerInfo[1];          m_bestMerit = baseSetError;          // find which att is different          if (m_rankingRequested) {            for (int i = 0; i < baseSet.length; i++) {              if (win.charAt(i) != bs.charAt(i)) {                m_rankedAtts[m_rankedSoFar][0] = i;                m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];                m_rankedSoFar++;              }            }          }          baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();        } else {          // Will get here for a subset whose error is outside the delta          // threshold but is not *significantly* worse than the base          // subset          //throw new Exception("RaceSearch: problem in hillClimbRace");        }      }    }    return attributeList(baseSet);  }  /**   * Convert an attribute set to an array of indices   */  private int [] attributeList(char [] list) {    int count = 0;    for (int i=0;i<m_numAttribs;i++) {      if (list[i] == '1') {        count++;      }    }    int [] rlist = new int[count];    count = 0;     for (int i=0;i<m_numAttribs;i++) {       if (list[i] == '1') {         rlist[count++] = i;       }     }     return rlist;  }  /**   * Races the leave-one-out cross validation errors of a set of   * attribute subsets on a set of instances.   * @param raceSets a set of attribute subset specifications   * @param data the instances to use when cross validating   * @param baseSetIncluded true if the first attribute set is a   * base set generated from the previous race   * @param random a random number generator   * @return the index of the winning subset   * @throws Exception if an error occurs during cross validation   */  private double [] raceSubsets(char [][]raceSets, Instances data,                                boolean baseSetIncluded, Random random)     throws Exception {    // the evaluators --- one for each subset    ASEvaluation [] evaluators =       ASEvaluation.makeCopies(m_theEvaluator, raceSets.length);    // array of subsets eliminated from the race    boolean [] eliminated = new boolean [raceSets.length];
racesearch.java - 源码说明

本页面展示了「这是关于数据挖掘的一些算法」中的 racesearch.java 源码文件，采用 Java 编程语言编写，共 1,770 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与数据挖掘相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?