📄 racesearch.java

📁 这是关于数据挖掘的一些算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   * </pre>   *    * <pre> -M   *  treat missing values as a seperate value.</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    String optionString;    resetOptions();        optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      setRaceType(new SelectedTag(Integer.parseInt(optionString),                                  TAGS_SELECTION));    }        optionString = Utils.getOption('F', options);    if (optionString.length() != 0) {      setFoldsType(new SelectedTag(Integer.parseInt(optionString),                                  XVALTAGS_SELECTION));    }    optionString = Utils.getOption('L', options);    if (optionString.length() !=0) {      setSignificanceLevel(Double.parseDouble(optionString));    }    optionString = Utils.getOption('T', options);    if (optionString.length() !=0) {      setThreshold(Double.parseDouble(optionString));    }    optionString = Utils.getOption('A', options);    if (optionString.length() != 0) {      setAttributeEvaluator(ASEvaluation.forName(optionString,                             Utils.partitionOptions(options)));    }    setGenerateRanking(Utils.getFlag('Q', options));    optionString = Utils.getOption('J', options);    if (optionString.length() != 0) {      setSelectionThreshold(Double.parseDouble(optionString));    }        optionString = Utils.getOption('N', options);    if (optionString.length() != 0) {      setNumToSelect(Integer.parseInt(optionString));    }    setDebug(Utils.getFlag('Z', options));  }  /**   * Gets the current settings of BestFirst.   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    int current = 0;    String[] evaluatorOptions = new String[0];    if ((m_ASEval != null) &&         (m_ASEval instanceof OptionHandler)) {      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();    }    String[] options = new String[17+evaluatorOptions.length];    options[current++] = "-R"; options[current++] = ""+m_raceType;    options[current++] = "-L"; options[current++] = ""+getSignificanceLevel();    options[current++] = "-T"; options[current++] = ""+getThreshold();    options[current++] = "-F"; options[current++] = ""+m_xvalType;    if (getGenerateRanking()) {      options[current++] = "-Q";    }    options[current++] = "-N"; options[current++] = ""+getNumToSelect();    options[current++] = "-J"; options[current++] = ""+getSelectionThreshold();    if (getDebug()) {      options[current++] = "-Z";    }        if (getAttributeEvaluator() != null) {      options[current++] = "-A";      options[current++] = getAttributeEvaluator().getClass().getName();      options[current++] = "--";      System.arraycopy(evaluatorOptions, 0, options, current,                        evaluatorOptions.length);      current += evaluatorOptions.length;    }        while (current < options.length) {      options[current++] = "";    }    return  options;  }  /**   * Searches the attribute subset space by racing cross validation   * errors of competing subsets   *   * @param ASEval the attribute evaluator to guide the search   * @param data the training instances.   * @return an array (not necessarily ordered) of selected attribute indexes   * @throws Exception if the search can't be completed   */  public int[] search (ASEvaluation ASEval, Instances data)    throws Exception {    if (!(ASEval instanceof SubsetEvaluator)) {      throw  new Exception(ASEval.getClass().getName()                            + " is not a "                            + "Subset evaluator! (RaceSearch)");    }    if (ASEval instanceof UnsupervisedSubsetEvaluator) {      throw new Exception("Can't use an unsupervised subset evaluator "                          +"(RaceSearch).");    }    if (!(ASEval instanceof HoldOutSubsetEvaluator)) {      throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "                          +"weka.attributeSelection.ClassifierSubsetEval "                          +"(RaceSearch)");    }    if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {      throw new Exception("Only error based subset evaluators can be used, "                          +"eg. weka.attributeSelection.ClassifierSubsetEval "                          +"(RaceSearch)");    }    m_Instances = new Instances(data);    m_Instances.deleteWithMissingClass();    if (m_Instances.numInstances() == 0) {      throw new Exception("All train instances have missing class! (RaceSearch)");    }    if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {      throw new Exception("More attributes requested than exist in the data "                          +"(RaceSearch).");    }    m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;    m_numAttribs = m_Instances.numAttributes();    m_classIndex = m_Instances.classIndex();    if (m_rankingRequested) {      m_rankedAtts = new double[m_numAttribs-1][2];      m_rankedSoFar = 0;    }    if (m_xvalType == LEAVE_ONE_OUT) {      m_numFolds = m_Instances.numInstances();    } else {      m_numFolds = 10;    }    Random random = new Random(1); // I guess this should really be a parameter?    m_Instances.randomize(random);    int [] bestSubset=null;    switch (m_raceType) {    case FORWARD_RACE:    case BACKWARD_RACE:       bestSubset = hillclimbRace(m_Instances, random);      break;    case SCHEMATA_RACE:      bestSubset = schemataRace(m_Instances, random);      break;    case RANK_RACE:      bestSubset = rankRace(m_Instances, random);      break;    }    return bestSubset;  }  public double [][] rankedAttributes() throws Exception {    if (!m_rankingRequested) {      throw new Exception("Need to request a ranked list of attributes "                          +"before attributes can be ranked (RaceSearch).");    }    if (m_rankedAtts == null) {      throw new Exception("Search must be performed before attributes "                          +"can be ranked (RaceSearch).");    }        double [][] final_rank = new double [m_rankedSoFar][2];    for (int i=0;i<m_rankedSoFar;i++) {      final_rank[i][0] = m_rankedAtts[i][0];      final_rank[i][1] = m_rankedAtts[i][1];    }    if (m_numToSelect <= 0) {      if (m_threshold == -Double.MAX_VALUE) {        m_calculatedNumToSelect = final_rank.length;      } else {        determineNumToSelectFromThreshold(final_rank);      }    }    return final_rank;  }  private void determineNumToSelectFromThreshold(double [][] ranking) {    int count = 0;    for (int i = 0; i < ranking.length; i++) {      if (ranking[i][1] > m_threshold) {        count++;      }    }    m_calculatedNumToSelect = count;  }  /**   * Print an attribute set.   */  private String printSets(char [][]raceSets) {    StringBuffer temp = new StringBuffer();    for (int i=0;i<raceSets.length;i++) {      for (int j=0;j<m_numAttribs;j++) {        temp.append(raceSets[i][j]);      }      temp.append('\n');    }    return temp.toString();  }  /**   * Performs a schemata race---a series of races in parallel.   * @param data the instances to estimate accuracy over.   * @param random a random number generator   * @return an array of selected attribute indices.   */  private int [] schemataRace(Instances data, Random random) throws Exception {    // # races, 2 (competitors in each race), # attributes    char [][][] parallelRaces;    int numRaces = m_numAttribs-1;    Random r = new Random(42);    int numInstances = data.numInstances();    Instances trainCV; Instances testCV;    Instance testInstance;    // statistics on the racers    Stats [][] raceStats = new Stats[numRaces][2];        parallelRaces = new char [numRaces][2][m_numAttribs-1];    char [] base = new char [m_numAttribs];    for (int i=0;i<m_numAttribs;i++) {      base[i] = '*';    }    int count=0;    // set up initial races    for (int i=0;i<m_numAttribs;i++) {      if (i != m_classIndex) {        parallelRaces[count][0] = (char [])base.clone();        parallelRaces[count][1] = (char [])base.clone();        parallelRaces[count][0][i] = '1';        parallelRaces[count++][1][i] = '0';      }    }        if (m_debug) {      System.err.println("Initial sets:\n");      for (int i=0;i<numRaces;i++) {        System.err.print(printSets(parallelRaces[i])+"--------------\n");      }    }        BitSet randomB = new BitSet(m_numAttribs);    char [] randomBC = new char [m_numAttribs];    // notes which bit positions have been decided    boolean [] attributeConstraints = new boolean[m_numAttribs];    double error;    int evaluationCount = 0;    raceSet: while (numRaces > 0) {      boolean won = false;      for (int i=0;i<numRaces;i++) {        raceStats[i][0] = new Stats();        raceStats[i][1] = new Stats();      }      // keep an eye on how many test instances have been randomly sampled      int sampleCount = 0;      // run the current set of races      while (!won) {        // generate a random binary string        for (int i=0;i<m_numAttribs;i++) {          if (i != m_classIndex) {            if (!attributeConstraints[i]) {              if (r.nextDouble() < 0.5) {                randomB.set(i);              } else {                randomB.clear(i);              }            } else { // this position has been decided from previous races              if (base[i] == '1') {                 randomB.set(i);              } else {                randomB.clear(i);              }            }          }        }                // randomly select an instance to test on        int testIndex = Math.abs(r.nextInt() % numInstances);        // We want to randomize the data the same way for every         // learning scheme.        trainCV = data.trainCV(numInstances, testIndex, new Random (1));        testCV = data.testCV(numInstances, testIndex);        testInstance = testCV.instance(0);        sampleCount++;        /*      if (sampleCount > numInstances) {          throw new Exception("raceSchemata: No clear winner after sampling "                              +sampleCount+" instances.");                              } */                m_theEvaluator.buildEvaluator(trainCV);                // the evaluator must retrain for every test point        error = -((HoldOutSubsetEvaluator)m_theEvaluator).          evaluateSubset(randomB,                          testInstance,                         true);        evaluationCount++;                // see which racers match this random subset        for (int i=0;i<m_numAttribs;i++) {          if (randomB.get(i)) {            randomBC[i] = '1';          } else {            randomBC[i] = '0';          }        }        //      System.err.println("Random subset: "+(new String(randomBC)));        checkRaces: for (int i=0;i<numRaces;i++) {          // if a pair of racers has evaluated more than num instances          // then bail out---unlikely that having any more atts is any          // better than the current base set.          if (((raceStats[i][0].count + raceStats[i][1].count) / 2) >               (numInstances)) {            break raceSet;
💿 文件大小 166 K
👤 上传用户 Tonic2009
📂 所属分类数学计算
🏷️ 相关标签

#数据挖掘 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -