📄 racesearch.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      } catch (Exception ex) {
      }
    }
  }

  /**
   * Gets whether ranking has been requested. This is used by the
   * AttributeSelection module to determine if rankedAttributes()
   * should be called.
   * @return true if ranking has been requested.
   */
  public boolean getGenerateRanking() {
    return m_rankingRequested;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numToSelectTipText() {
    return "Specify the number of attributes to retain. Use in conjunction "
      +"with generateRanking. The default value "
      +"(-1) indicates that all attributes are to be retained. Use either "
      +"this option or a threshold to reduce the attribute set.";
  }

  /**
   * Specify the number of attributes to select from the ranked list
   * (if generating a ranking). -1
   * indicates that all attributes are to be retained.
   * @param n the number of attributes to retain
   */
  public void setNumToSelect(int n) {
    m_numToSelect = n;
  }

  /**
   * Gets the number of attributes to be retained.
   * @return the number of attributes to retain
   */
  public int getNumToSelect() {
    return m_numToSelect;
  }

  /**
   * Gets the calculated number of attributes to retain. This is the
   * actual number of attributes to retain. This is the same as
   * getNumToSelect if the user specifies a number which is not less
   * than zero. Otherwise it should be the number of attributes in the
   * (potentially transformed) data.
   */
  public int getCalculatedNumToSelect() {
    if (m_numToSelect >= 0) {
      m_calculatedNumToSelect = m_numToSelect;
    }
    return m_calculatedNumToSelect;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String selectionThresholdTipText() {
    return "Set threshold by which attributes can be discarded. Default value "
      + "results in no attributes being discarded. Use in conjunction with "
      + "generateRanking";
  }

  /**
   * Set the threshold by which the AttributeSelection module can discard
   * attributes.
   * @param threshold the threshold.
   */
  public void setSelectionThreshold(double threshold) {
    m_threshold = threshold;
  }

  /**
   * Returns the threshold so that the AttributeSelection module can
   * discard attributes from the ranking.
   */
  public double getSelectionThreshold() {
    return m_threshold;
  }


  /**
   * Returns an enumeration describing the available options.
   * @return an enumeration of all the available options.
   **/
  public Enumeration listOptions () {
    Vector newVector = new Vector(8);
     newVector.addElement(new Option("\tType of race to perform.\n\t"
				     +"(default = 0).",
				     "R", 1 ,"-R <0 = forward | 1 = backward "
				     +"race | 2 = schemata | 3 = rank>"));
     newVector.addElement(new Option("\tSignificance level for comaparisons"
				     +"\n\t(default = 0.001(forward/backward/"
				     +"rank)/0.01(schemata)).",
				     "L",1,"-L <significance>"));
     newVector.addElement(new Option("\tThreshold for error comparison.\n\t"
				     +"(default = 0.001).",
				     "T",1,"-T <threshold>"));
     
     newVector.addElement(new Option("\tAttribute ranker to use if doing a "
			   +"\n\trank search. Place any\n\t"
			   +"evaluator options LAST on the" 
			   + "\n\tcommand line following a \"--\"." 
			   + "\n\teg. -A weka.attributeSelection."
			   +"GainRatioAttributeEval ... " 
			   + "-- -M.\n\t(default = GainRatioAttributeEval)", 
			   "A", 1, "-A <attribute evaluator>"));
    
     newVector.addElement(new Option("\tFolds for cross validation\n\t"
			    +"(default = 0 (1 if schemata race)",
			    "F",1,"-F <0 = 10 fold | 1 = leave-one-out>"));
     newVector.addElement(new Option("\tGenerate a ranked list of attributes."
				     +"\n\tForces the search to be forward\n."
				     +"\tand races until all attributes have\n"
				     +"\tselected, thus producing a ranking.",
				     "Q",0,"-Q"));

    newVector
      .addElement(new Option("\tSpecify number of attributes to retain from "
			     +"\n\tthe ranking. Overides -T. Use "
			     +"in conjunction with -Q"
			     ,"N",1
			     , "-N <num to select>"));

    newVector
      .addElement(new Option("\tSpecify a theshold by which attributes" 
			     + "\n\tmay be discarded from the ranking."
			     +"\n\tUse in conjuction with -Q","T",1
			     , "-T <threshold>"));

     newVector.addElement(new Option("\tVerbose output for monitoring the "
				     +"search.",
				     "Z",0,"-Z"));
     if ((m_ASEval != null) && 
	 (m_ASEval instanceof OptionHandler)) {
       newVector.addElement(new Option("", "", 0, "\nOptions specific to " 
				       + "evaluator " 
				       + m_ASEval.getClass().getName() 
				       + ":"));
       Enumeration em = ((OptionHandler)m_ASEval).listOptions();
       
       while (em.hasMoreElements()) {
	 newVector.addElement(em.nextElement());
       }
     }
     return newVector.elements();
  }

  /**
   * Parses a given list of options.
   *
   * Valid options are:<p>
   *
   * -R <race type><br>
   * 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p>
   * 
   * -L <significance level> <br>
   * significance level to use for t-tests. <p>
   *
   * -T <threshold> <br>
   * threshold for considering mean errors of two subsets the same <p>
   *
   * -F <xval type> <br>
   * 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race
   * <p>
   *
   * -A <attribute evaluator> <br>
   * the attribute evaluator to use when doing a rank search <p>
   *
   * -Q <br>
   * produce a ranked list of attributes. Selecting this option forces
   * the race type to be forward. Racing continues until *all* attributes
   * have been selected, thus producing a ranked list of attributes. <p>
   *
   * -N <number to retain> <br>
   * Specify the number of attributes to retain. Overides any threshold. 
   * Use in conjunction with -Q. <p>
   * 
   * -J <threshold> <br>
   * Specify a threshold by which the AttributeSelection module can discard
   * attributes. Use in conjunction with -Q. <p>
   *
   * -Z <br>
   * Turn on verbose output for monitoring the search <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   *
   **/
  public void setOptions (String[] options)
    throws Exception {
    String optionString;
    resetOptions();
    
    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setRaceType(new SelectedTag(Integer.parseInt(optionString),
				  TAGS_SELECTION));
    }
    
    optionString = Utils.getOption('F', options);
    if (optionString.length() != 0) {
      setFoldsType(new SelectedTag(Integer.parseInt(optionString),
				  XVALTAGS_SELECTION));
    }

    optionString = Utils.getOption('L', options);
    if (optionString.length() !=0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setSignificanceLevel(temp.doubleValue());
    }

    optionString = Utils.getOption('T', options);
    if (optionString.length() !=0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setThreshold(temp.doubleValue());
    }

    optionString = Utils.getOption('A', options);
    if (optionString.length() != 0) {
      setAttributeEvaluator(ASEvaluation.forName(optionString, 
			    Utils.partitionOptions(options)));
    }

    setGenerateRanking(Utils.getFlag('Q', options));

    optionString = Utils.getOption('T', options);
    if (optionString.length() != 0) {
      Double temp;
      temp = Double.valueOf(optionString);
      setThreshold(temp.doubleValue());
    }
    
    optionString = Utils.getOption('N', options);
    if (optionString.length() != 0) {
      setNumToSelect(Integer.parseInt(optionString));
    }

    setDebug(Utils.getFlag('Z', options));
  }

  /**
   * Gets the current settings of BestFirst.
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    int current = 0;
    String[] evaluatorOptions = new String[0];

    if ((m_ASEval != null) && 
	(m_ASEval instanceof OptionHandler)) {
      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
    }
    String[] options = new String[17+evaluatorOptions.length];

    options[current++] = "-R"; options[current++] = ""+m_raceType;
    options[current++] = "-L"; options[current++] = ""+getSignificanceLevel();
    options[current++] = "-T"; options[current++] = ""+getThreshold();
    options[current++] = "-F"; options[current++] = ""+m_xvalType;
    if (getGenerateRanking()) {
      options[current++] = "-Q";
    }
    options[current++] = "-N"; options[current++] = ""+getNumToSelect();
    options[current++] = "-J"; options[current++] = ""+getSelectionThreshold();
    if (getDebug()) {
      options[current++] = "-Z";
    }
    
    if (getAttributeEvaluator() != null) {
      options[current++] = "-A";
      options[current++] = getAttributeEvaluator().getClass().getName();
      options[current++] = "--";
      System.arraycopy(evaluatorOptions, 0, options, current, 
		       evaluatorOptions.length);
      current += evaluatorOptions.length;
    }

    
    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }




  /**
   * Searches the attribute subset space by racing cross validation
   * errors of competing subsets
   *
   * @param ASEvaluator the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @exception Exception if the search can't be completed
   */
  public int[] search (ASEvaluation ASEval, Instances data)
    throws Exception {
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw  new Exception(ASEval.getClass().getName() 
			   + " is not a " 
			   + "Subset evaluator! (RaceSearch)");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      throw new Exception("Can't use an unsupervised subset evaluator "
			  +"(RaceSearch).");
    }

    if (!(ASEval instanceof HoldOutSubsetEvaluator)) {
      throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "
			  +"weka.attributeSelection.ClassifierSubsetEval "
			  +"(RaceSearch)");
    }

    if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {
      throw new Exception("Only error based subset evaluators can be used, "
			  +"eg. weka.attributeSelection.ClassifierSubsetEval "
			  +"(RaceSearch)");
    }

    m_Instances = data;
    m_Instances.deleteWithMissingClass();
    if (m_Instances.numInstances() == 0) {
      throw new Exception("All instances have missing class! (RaceSearch)");
    }
    if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {
      throw new Exception("More attributes requested than exist in the data "
			  +"(RaceSearch).");
    }
    m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;
    m_numAttribs = m_Instances.numAttributes();
    m_classIndex = m_Instances.classIndex();

    if (m_rankingRequested) {
      m_rankedAtts = new double[m_numAttribs-1][2];
      m_rankedSoFar = 0;
    }

    if (m_xvalType == LEAVE_ONE_OUT) {
      m_numFolds = data.numInstances();
    } else {
      m_numFolds = 10;
    }

    Random random = new Random(1); // I guess this should really be a parameter?
    data.randomize(random);
    int [] bestSubset=null;

    switch (m_raceType) {
    case FORWARD_RACE:
    case BACKWARD_RACE: 
      bestSubset = hillclimbRace(data, random);
      break;
    case SCHEMATA_RACE:
      bestSubset = schemataRace(data, random);
      break;
    case RANK_RACE:
      bestSubset = rankRace(data, random);
      break;
    }

    return bestSubset;
  }

  public double [][] rankedAttributes() throws Exception {
    if (!m_rankingRequested) {
      throw new Exception("Need to request a ranked list of attributes "
			  +"before attributes can be ranked (RaceSearch).");
    }
    if (m_rankedAtts == null) {
      throw new Exception("Search must be performed before attributes "
			  +"can be ranked (RaceSearch).");
    }
    
    double [][] final_rank = new double [m_rankedSoFar][2];
    for (int i=0;i<m_rankedSoFar;i++) {
      final_rank[i][0] = m_rankedAtts[i][0];
      final_rank[i][1] = m_rankedAtts[i][1];
    }

    if (m_numToSelect <= 0) {
      if (m_threshold == -Double.MAX_VALUE) {
	m_calculatedNumToSelect = final_rank.length;
      } else {
	determineNumToSelectFromThreshold(final_rank);
      }
    }

    return final_rank;
  }

  private void determineNumToSelectFromThreshold(double [][] ranking) {
    int count = 0;
    for (int i = 0; i < ranking.length; i++) {
      if (ranking[i][1] > m_threshold) {
	count++;
      }
    }
    m_calculatedNumToSelect = count;
  }

  /**
   * Print an attribute set.
   */
  private String printSets(char [][]raceSets) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -