📄 attributeselection.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.attributeSelection;
import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.MethodDescriptor;
import java.beans.PropertyDescriptor;
import java.io.FileReader;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Range;
import org.agentacademy.modules.dataminer.core.Utils;
import org.agentacademy.modules.dataminer.filters.AttributeFilter;
import org.agentacademy.modules.dataminer.filters.Filter;
import org.apache.log4j.Logger;
/**
* Attribute selection class. Takes the name of a search class and
* an evaluation class on the command line. <p>
*
* Valid options are: <p>
*
* -h <br>
* Display help. <p>
*
* -I <name of input file> <br>
* Specify the training arff file. <p>
*
* -C <class index> <br>
* The index of the attribute to use as the class. <p>
*
* -S <search method> <br>
* The full class name of the search method followed by search method options
* (if any).<br>
* Eg. -S "weka.attributeSelection.BestFirst -N 10" <p>
*
* -X <number of folds> <br>
* Perform a cross validation. <p>
*
* -N <random number seed> <br>
* Specify a random number seed. Use in conjuction with -X. (Default = 1). <p>
*
* ------------------------------------------------------------------------ <p>
*
* Example usage as the main of an attribute evaluator (called FunkyEvaluator):
* <code> <pre>
* public static void main(String [] args) {
* try {
* ASEvaluator eval = new FunkyEvaluator();
* System.out.println(SelectAttributes(Evaluator, args));
* } catch (Exception e) {
* System.err.println(e.getMessage());
* }
* }
* </code> </pre>
* <p>
*
* ------------------------------------------------------------------------ <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.3 $
*/
public class AttributeSelection implements Serializable {
public static Logger log = Logger.getLogger(AttributeSelection.class);
/** the instances to select attributes from */
private Instances m_trainInstances;
/** the attribute/subset evaluator */
private ASEvaluation m_ASEvaluator;
/** the search method */
private ASSearch m_searchMethod;
/** the number of folds to use for cross validation */
private int m_numFolds;
/** holds a string describing the results of the attribute selection */
private StringBuffer m_selectionResults;
/** rank features (if allowed by the search method) */
private boolean m_doRank;
/** do cross validation */
private boolean m_doXval;
/** seed used to randomly shuffle instances for cross validation */
private int m_seed;
/** cutoff value by which to select attributes for ranked results */
private double m_threshold;
/** number of attributes requested from ranked results */
private int m_numToSelect;
/** the selected attributes */
private int [] m_selectedAttributeSet;
/** the attribute indexes and associated merits if a ranking is produced */
private double [][] m_attributeRanking;
/** if a feature selection run involves an attribute transformer */
private AttributeTransformer m_transformer = null;
/** the attribute filter for processing instances with respect to
the most recent feature selection run */
private AttributeFilter m_attributeFilter = null;
/** hold statistics for repeated feature selection, such as
under cross validation */
private double [][] m_rankResults = null;
private double [] m_subsetResults = null;
private int m_trials = 0;
/**
* Return the number of attributes selected from the most recent
* run of attribute selection
* @return the number of attributes selected
*/
public int numberAttributesSelected() throws Exception {
int [] att = selectedAttributes();
return att.length-1;
}
/**
* get the final selected set of attributes.
* @return an array of attribute indexes
* @exception Exception if attribute selection has not been performed yet
*/
public int [] selectedAttributes () throws Exception {
if (m_selectedAttributeSet == null) {
throw new Exception("Attribute selection has not been performed yet!");
}
return m_selectedAttributeSet;
}
/**
* get the final ranking of the attributes.
* @return a two dimensional array of ranked attribute indexes and their
* associated merit scores as doubles.
* @exception Exception if a ranking has not been produced
*/
public double [][] rankedAttributes () throws Exception {
if (m_attributeRanking == null) {
throw new Exception("Ranking has not been performed");
}
return m_attributeRanking;
}
/**
* set the attribute/subset evaluator
* @param evaluator the evaluator to use
*/
public void setEvaluator (ASEvaluation evaluator) {
m_ASEvaluator = evaluator;
}
/**
* set the search method
* @param search the search method to use
*/
public void setSearch (ASSearch search) {
m_searchMethod = search;
if (m_searchMethod instanceof RankedOutputSearch) {
setRanking(((RankedOutputSearch)m_searchMethod).getGenerateRanking());
}
}
/**
* set the number of folds for cross validation
* @param folds the number of folds
*/
public void setFolds (int folds) {
m_numFolds = folds;
}
/**
* produce a ranking (if possible with the set search and evaluator)
* @param r true if a ranking is to be produced
*/
public void setRanking (boolean r) {
m_doRank = r;
}
/**
* do a cross validation
* @param x true if a cross validation is to be performed
*/
public void setXval (boolean x) {
m_doXval = x;
}
/**
* set the seed for use in cross validation
* @param s the seed
*/
public void setSeed (int s) {
m_seed = s;
}
/**
* set the threshold by which to select features from a ranked list
* @param t the threshold
*/
public void setThreshold (double t) {
m_threshold = t;
}
/**
* get a description of the attribute selection
* @return a String describing the results of attribute selection
*/
public String toResultsString() {
return m_selectionResults.toString();
}
/**
* reduce the dimensionality of a set of instances to include only those
* attributes chosen by the last run of attribute selection.
* @param in the instances to be reduced
* @return a dimensionality reduced set of instances
* @exception Exception if the instances can't be reduced
*/
public Instances reduceDimensionality(Instances in) throws Exception {
if (m_attributeFilter == null) {
throw new Exception("No feature selection has been performed yet!");
}
if (m_transformer != null) {
Instances transformed = new Instances(m_transformer.transformedHeader(),
in.numInstances());
for (int i=0;i<in.numInstances();i++) {
transformed.add(m_transformer.convertInstance(in.instance(i)));
}
return Filter.useFilter(transformed, m_attributeFilter);
}
return Filter.useFilter(in, m_attributeFilter);
}
/**
* reduce the dimensionality of a single instance to include only those
* attributes chosen by the last run of attribute selection.
* @param in the instance to be reduced
* @return a dimensionality reduced instance
* @exception Exception if the instance can't be reduced
*/
public Instance reduceDimensionality(Instance in) throws Exception {
if (m_attributeFilter == null) {
throw new Exception("No feature selection has been performed yet!");
}
if (m_transformer != null) {
in = m_transformer.convertInstance(in);
}
m_attributeFilter.input(in);
m_attributeFilter.batchFinished();
Instance result = m_attributeFilter.output();
return result;
}
/**
* constructor. Sets defaults for each member varaible. Default
* attribute evaluator is CfsSubsetEval; default search method is
* BestFirst.
*/
public AttributeSelection () {
setFolds(10);
setRanking(false);
setXval(false);
setSeed(1);
// m_threshold = -Double.MAX_VALUE;
setEvaluator(new CfsSubsetEval());
setSearch(new ForwardSelection());
m_selectionResults = new StringBuffer();
m_selectedAttributeSet = null;
m_attributeRanking = null;
}
/**
* Perform attribute selection with a particular evaluator and
* a set of options specifying search method and input file etc.
*
* @param ASEvaluator an evaluator object
* @param options an array of options, not only for the evaluator
* but also the search method (if any) and an input data file
* @return the results of attribute selection as a String
* @exception Exception if no training file is set
*/
public static String SelectAttributes (ASEvaluation ASEvaluator,
String[] options)
throws Exception
{
String trainFileName, searchName;
Instances train = null;
ASSearch searchMethod = null;
try {
// get basic options (options the same for all attribute selectors
trainFileName = Utils.getOption('I', options);
if (trainFileName.length() == 0) {
searchName = Utils.getOption('S', options);
if (searchName.length() != 0) {
searchMethod = (ASSearch)Class.forName(searchName).newInstance();
}
throw new Exception("No training file given.");
}
}
catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
train = new Instances(new FileReader(trainFileName));
return SelectAttributes(ASEvaluator, options, train);
}
/**
* returns a string summarizing the results of repeated attribute
* selection runs on splits of a dataset.
* @return a summary of attribute selection results
* @exception Exception if no attribute selection has been performed.
*/
public String CVResultsString () throws Exception {
StringBuffer CvString = new StringBuffer();
if ((m_subsetResults == null && m_rankResults == null) ||
( m_trainInstances == null)) {
throw new Exception("Attribute selection has not been performed yet!");
}
int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -