📄 racesearch.java
字号:
* </pre> * * <pre> -M * treat missing values as a seperate value.</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions (String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setRaceType(new SelectedTag(Integer.parseInt(optionString), TAGS_SELECTION)); } optionString = Utils.getOption('F', options); if (optionString.length() != 0) { setFoldsType(new SelectedTag(Integer.parseInt(optionString), XVALTAGS_SELECTION)); } optionString = Utils.getOption('L', options); if (optionString.length() !=0) { setSignificanceLevel(Double.parseDouble(optionString)); } optionString = Utils.getOption('T', options); if (optionString.length() !=0) { setThreshold(Double.parseDouble(optionString)); } optionString = Utils.getOption('A', options); if (optionString.length() != 0) { setAttributeEvaluator(ASEvaluation.forName(optionString, Utils.partitionOptions(options))); } setGenerateRanking(Utils.getFlag('Q', options)); optionString = Utils.getOption('J', options); if (optionString.length() != 0) { setSelectionThreshold(Double.parseDouble(optionString)); } optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumToSelect(Integer.parseInt(optionString)); } setDebug(Utils.getFlag('Z', options)); } /** * Gets the current settings of BestFirst. * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { int current = 0; String[] evaluatorOptions = new String[0]; if ((m_ASEval != null) && (m_ASEval instanceof OptionHandler)) { evaluatorOptions = ((OptionHandler)m_ASEval).getOptions(); } String[] options = new String[17+evaluatorOptions.length]; options[current++] = "-R"; options[current++] = ""+m_raceType; options[current++] = "-L"; options[current++] = ""+getSignificanceLevel(); options[current++] = "-T"; options[current++] = ""+getThreshold(); options[current++] = "-F"; options[current++] = ""+m_xvalType; if (getGenerateRanking()) { options[current++] = "-Q"; } options[current++] = "-N"; options[current++] = ""+getNumToSelect(); options[current++] = "-J"; options[current++] = ""+getSelectionThreshold(); if (getDebug()) { options[current++] = "-Z"; } if (getAttributeEvaluator() != null) { options[current++] = "-A"; options[current++] = getAttributeEvaluator().getClass().getName(); options[current++] = "--"; System.arraycopy(evaluatorOptions, 0, options, current, evaluatorOptions.length); current += evaluatorOptions.length; } while (current < options.length) { options[current++] = ""; } return options; } /** * Searches the attribute subset space by racing cross validation * errors of competing subsets * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator! (RaceSearch)"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { throw new Exception("Can't use an unsupervised subset evaluator " +"(RaceSearch)."); } if (!(ASEval instanceof HoldOutSubsetEvaluator)) { throw new Exception("Must use a HoldOutSubsetEvaluator, eg. " +"weka.attributeSelection.ClassifierSubsetEval " +"(RaceSearch)"); } if (!(ASEval instanceof ErrorBasedMeritEvaluator)) { throw new Exception("Only error based subset evaluators can be used, " +"eg. weka.attributeSelection.ClassifierSubsetEval " +"(RaceSearch)"); } m_Instances = new Instances(data); m_Instances.deleteWithMissingClass(); if (m_Instances.numInstances() == 0) { throw new Exception("All train instances have missing class! (RaceSearch)"); } if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) { throw new Exception("More attributes requested than exist in the data " +"(RaceSearch)."); } m_theEvaluator = (HoldOutSubsetEvaluator)ASEval; m_numAttribs = m_Instances.numAttributes(); m_classIndex = m_Instances.classIndex(); if (m_rankingRequested) { m_rankedAtts = new double[m_numAttribs-1][2]; m_rankedSoFar = 0; } if (m_xvalType == LEAVE_ONE_OUT) { m_numFolds = m_Instances.numInstances(); } else { m_numFolds = 10; } Random random = new Random(1); // I guess this should really be a parameter? m_Instances.randomize(random); int [] bestSubset=null; switch (m_raceType) { case FORWARD_RACE: case BACKWARD_RACE: bestSubset = hillclimbRace(m_Instances, random); break; case SCHEMATA_RACE: bestSubset = schemataRace(m_Instances, random); break; case RANK_RACE: bestSubset = rankRace(m_Instances, random); break; } return bestSubset; } public double [][] rankedAttributes() throws Exception { if (!m_rankingRequested) { throw new Exception("Need to request a ranked list of attributes " +"before attributes can be ranked (RaceSearch)."); } if (m_rankedAtts == null) { throw new Exception("Search must be performed before attributes " +"can be ranked (RaceSearch)."); } double [][] final_rank = new double [m_rankedSoFar][2]; for (int i=0;i<m_rankedSoFar;i++) { final_rank[i][0] = m_rankedAtts[i][0]; final_rank[i][1] = m_rankedAtts[i][1]; } if (m_numToSelect <= 0) { if (m_threshold == -Double.MAX_VALUE) { m_calculatedNumToSelect = final_rank.length; } else { determineNumToSelectFromThreshold(final_rank); } } return final_rank; } private void determineNumToSelectFromThreshold(double [][] ranking) { int count = 0; for (int i = 0; i < ranking.length; i++) { if (ranking[i][1] > m_threshold) { count++; } } m_calculatedNumToSelect = count; } /** * Print an attribute set. */ private String printSets(char [][]raceSets) { StringBuffer temp = new StringBuffer(); for (int i=0;i<raceSets.length;i++) { for (int j=0;j<m_numAttribs;j++) { temp.append(raceSets[i][j]); } temp.append('\n'); } return temp.toString(); } /** * Performs a schemata race---a series of races in parallel. * @param data the instances to estimate accuracy over. * @param random a random number generator * @return an array of selected attribute indices. */ private int [] schemataRace(Instances data, Random random) throws Exception { // # races, 2 (competitors in each race), # attributes char [][][] parallelRaces; int numRaces = m_numAttribs-1; Random r = new Random(42); int numInstances = data.numInstances(); Instances trainCV; Instances testCV; Instance testInstance; // statistics on the racers Stats [][] raceStats = new Stats[numRaces][2]; parallelRaces = new char [numRaces][2][m_numAttribs-1]; char [] base = new char [m_numAttribs]; for (int i=0;i<m_numAttribs;i++) { base[i] = '*'; } int count=0; // set up initial races for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { parallelRaces[count][0] = (char [])base.clone(); parallelRaces[count][1] = (char [])base.clone(); parallelRaces[count][0][i] = '1'; parallelRaces[count++][1][i] = '0'; } } if (m_debug) { System.err.println("Initial sets:\n"); for (int i=0;i<numRaces;i++) { System.err.print(printSets(parallelRaces[i])+"--------------\n"); } } BitSet randomB = new BitSet(m_numAttribs); char [] randomBC = new char [m_numAttribs]; // notes which bit positions have been decided boolean [] attributeConstraints = new boolean[m_numAttribs]; double error; int evaluationCount = 0; raceSet: while (numRaces > 0) { boolean won = false; for (int i=0;i<numRaces;i++) { raceStats[i][0] = new Stats(); raceStats[i][1] = new Stats(); } // keep an eye on how many test instances have been randomly sampled int sampleCount = 0; // run the current set of races while (!won) { // generate a random binary string for (int i=0;i<m_numAttribs;i++) { if (i != m_classIndex) { if (!attributeConstraints[i]) { if (r.nextDouble() < 0.5) { randomB.set(i); } else { randomB.clear(i); } } else { // this position has been decided from previous races if (base[i] == '1') { randomB.set(i); } else { randomB.clear(i); } } } } // randomly select an instance to test on int testIndex = Math.abs(r.nextInt() % numInstances); // We want to randomize the data the same way for every // learning scheme. trainCV = data.trainCV(numInstances, testIndex, new Random (1)); testCV = data.testCV(numInstances, testIndex); testInstance = testCV.instance(0); sampleCount++; /* if (sampleCount > numInstances) { throw new Exception("raceSchemata: No clear winner after sampling " +sampleCount+" instances."); } */ m_theEvaluator.buildEvaluator(trainCV); // the evaluator must retrain for every test point error = -((HoldOutSubsetEvaluator)m_theEvaluator). evaluateSubset(randomB, testInstance, true); evaluationCount++; // see which racers match this random subset for (int i=0;i<m_numAttribs;i++) { if (randomB.get(i)) { randomBC[i] = '1'; } else { randomBC[i] = '0'; } } // System.err.println("Random subset: "+(new String(randomBC))); checkRaces: for (int i=0;i<numRaces;i++) { // if a pair of racers has evaluated more than num instances // then bail out---unlikely that having any more atts is any // better than the current base set. if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > (numInstances)) { break raceSet;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -