📄 attributeselection.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * AttributeSelection.java * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand * */package weka.attributeSelection;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.converters.ConverterUtils.DataSource;import weka.filters.Filter;import weka.filters.unsupervised.attribute.Remove;import java.beans.BeanInfo;import java.beans.IntrospectionException;import java.beans.Introspector;import java.beans.MethodDescriptor;import java.beans.PropertyDescriptor;import java.io.Serializable;import java.lang.reflect.Method;import java.util.Enumeration;import java.util.Random;/** * Attribute selection class. Takes the name of a search class and * an evaluation class on the command line. <p/> * * Valid options are: <p/> * * -h <br/> * Display help. <p/> * * -i <name of input file> <br/> * Specify the training data file. <p/> * * -c <class index> <br/> * The index of the attribute to use as the class. <p/> * * -s <search method> <br/> * The full class name of the search method followed by search method options * (if any).<br/> * Eg. -s "weka.attributeSelection.BestFirst -N 10" <p/> * * -x <number of folds> <br/> * Perform a cross validation. <p/> * * -n <random number seed> <br/> * Specify a random number seed. Use in conjuction with -x. (Default = 1). <p/> * * ------------------------------------------------------------------------ <p/> * * Example usage as the main of an attribute evaluator (called FunkyEvaluator): * <pre> * public static void main(String [] args) { * runEvaluator(new FunkyEvaluator(), args); * } * </pre> * <p/> * * ------------------------------------------------------------------------ <p/> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.46 $ */public class AttributeSelection implements Serializable { /** for serialization */ static final long serialVersionUID = 4170171824147584330L; /** the instances to select attributes from */ private Instances m_trainInstances; /** the attribute/subset evaluator */ private ASEvaluation m_ASEvaluator; /** the search method */ private ASSearch m_searchMethod; /** the number of folds to use for cross validation */ private int m_numFolds; /** holds a string describing the results of the attribute selection */ private StringBuffer m_selectionResults; /** rank features (if allowed by the search method) */ private boolean m_doRank; /** do cross validation */ private boolean m_doXval; /** seed used to randomly shuffle instances for cross validation */ private int m_seed; /** number of attributes requested from ranked results */ private int m_numToSelect; /** the selected attributes */ private int [] m_selectedAttributeSet; /** the attribute indexes and associated merits if a ranking is produced */ private double [][] m_attributeRanking; /** if a feature selection run involves an attribute transformer */ private AttributeTransformer m_transformer = null; /** the attribute filter for processing instances with respect to the most recent feature selection run */ private Remove m_attributeFilter = null; /** hold statistics for repeated feature selection, such as under cross validation */ private double [][] m_rankResults = null; private double [] m_subsetResults = null; private int m_trials = 0; /** * Return the number of attributes selected from the most recent * run of attribute selection * @return the number of attributes selected */ public int numberAttributesSelected() throws Exception { int [] att = selectedAttributes(); return att.length-1; } /** * get the final selected set of attributes. * @return an array of attribute indexes * @exception Exception if attribute selection has not been performed yet */ public int [] selectedAttributes () throws Exception { if (m_selectedAttributeSet == null) { throw new Exception("Attribute selection has not been performed yet!"); } return m_selectedAttributeSet; } /** * get the final ranking of the attributes. * @return a two dimensional array of ranked attribute indexes and their * associated merit scores as doubles. * @exception Exception if a ranking has not been produced */ public double [][] rankedAttributes () throws Exception { if (m_attributeRanking == null) { throw new Exception("Ranking has not been performed"); } return m_attributeRanking; } /** * set the attribute/subset evaluator * @param evaluator the evaluator to use */ public void setEvaluator (ASEvaluation evaluator) { m_ASEvaluator = evaluator; } /** * set the search method * @param search the search method to use */ public void setSearch (ASSearch search) { m_searchMethod = search; if (m_searchMethod instanceof RankedOutputSearch) { setRanking(((RankedOutputSearch)m_searchMethod).getGenerateRanking()); } } /** * set the number of folds for cross validation * @param folds the number of folds */ public void setFolds (int folds) { m_numFolds = folds; } /** * produce a ranking (if possible with the set search and evaluator) * @param r true if a ranking is to be produced */ public void setRanking (boolean r) { m_doRank = r; } /** * do a cross validation * @param x true if a cross validation is to be performed */ public void setXval (boolean x) { m_doXval = x; } /** * set the seed for use in cross validation * @param s the seed */ public void setSeed (int s) { m_seed = s; } /** * get a description of the attribute selection * @return a String describing the results of attribute selection */ public String toResultsString() { return m_selectionResults.toString(); } /** * reduce the dimensionality of a set of instances to include only those * attributes chosen by the last run of attribute selection. * @param in the instances to be reduced * @return a dimensionality reduced set of instances * @exception Exception if the instances can't be reduced */ public Instances reduceDimensionality(Instances in) throws Exception { if (m_attributeFilter == null) { throw new Exception("No feature selection has been performed yet!"); } if (m_transformer != null) { Instances transformed = new Instances(m_transformer.transformedHeader(), in.numInstances()); for (int i=0;i<in.numInstances();i++) { transformed.add(m_transformer.convertInstance(in.instance(i))); } return Filter.useFilter(transformed, m_attributeFilter); } return Filter.useFilter(in, m_attributeFilter); } /** * reduce the dimensionality of a single instance to include only those * attributes chosen by the last run of attribute selection. * @param in the instance to be reduced * @return a dimensionality reduced instance * @exception Exception if the instance can't be reduced */ public Instance reduceDimensionality(Instance in) throws Exception { if (m_attributeFilter == null) { throw new Exception("No feature selection has been performed yet!"); } if (m_transformer != null) { in = m_transformer.convertInstance(in); } m_attributeFilter.input(in); m_attributeFilter.batchFinished(); Instance result = m_attributeFilter.output(); return result; } /** * constructor. Sets defaults for each member varaible. Default * attribute evaluator is CfsSubsetEval; default search method is * BestFirst. */ public AttributeSelection () { setFolds(10); setRanking(false); setXval(false); setSeed(1); setEvaluator(new CfsSubsetEval()); setSearch(new GreedyStepwise()); m_selectionResults = new StringBuffer(); m_selectedAttributeSet = null; m_attributeRanking = null; } /** * Perform attribute selection with a particular evaluator and * a set of options specifying search method and input file etc. * * @param ASEvaluator an evaluator object * @param options an array of options, not only for the evaluator * but also the search method (if any) and an input data file * @return the results of attribute selection as a String * @exception Exception if no training file is set */ public static String SelectAttributes (ASEvaluation ASEvaluator, String[] options) throws Exception { String trainFileName, searchName; Instances train = null; ASSearch searchMethod = null; String[] optionsTmp = (String[]) options.clone(); boolean helpRequested = false; try { // get basic options (options the same for all attribute selectors trainFileName = Utils.getOption('i', options); helpRequested = Utils.getFlag('h', optionsTmp); if (helpRequested || (trainFileName.length() == 0)) { searchName = Utils.getOption('s', optionsTmp); if (searchName.length() != 0) { String[] searchOptions = Utils.splitOptions(searchName); searchMethod = (ASSearch)Class.forName(searchOptions[0]).newInstance(); } if (helpRequested) throw new Exception("Help requested."); else throw new Exception("No training file given."); } } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod)); } DataSource source = new DataSource(trainFileName); train = source.getDataSet(); return SelectAttributes(ASEvaluator, options, train); } /** * returns a string summarizing the results of repeated attribute * selection runs on splits of a dataset. * @return a summary of attribute selection results * @exception Exception if no attribute selection has been performed. */ public String CVResultsString () throws Exception { StringBuffer CvString = new StringBuffer(); if ((m_subsetResults == null && m_rankResults == null) || ( m_trainInstances == null)) { throw new Exception("Attribute selection has not been performed yet!"); } int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0); CvString.append("\n\n=== Attribute selection " + m_numFolds + " fold cross-validation "); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator) && (m_trainInstances.classAttribute().isNominal())) { CvString.append("(stratified), seed: "); CvString.append(m_seed+" ===\n\n"); } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -