⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 attributeselection.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:

/**
 *
 *   AgentAcademy - an open source Data Mining framework for
 *   training intelligent agents
 *
 *   Copyright (C)   2001-2003 AA Consortium.
 *
 *   This library is open source software; you can redistribute it
 *   and/or modify it under the terms of the GNU Lesser General
 *   Public License as published by the Free Software Foundation;
 *   either version 2.0 of the License, or (at your option) any later
 *   version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free
 *   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *   MA  02111-1307 USA
 *
 */

package  org.agentacademy.modules.dataminer.attributeSelection;

import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.MethodDescriptor;
import java.beans.PropertyDescriptor;
import java.io.FileReader;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;

import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Range;
import org.agentacademy.modules.dataminer.core.Utils;
import org.agentacademy.modules.dataminer.filters.AttributeFilter;
import org.agentacademy.modules.dataminer.filters.Filter;
import org.apache.log4j.Logger;

/**
 * Attribute selection class. Takes the name of a search class and
 * an evaluation class on the command line. <p>
 *
 * Valid options are: <p>
 *
 * -h <br>
 * Display help. <p>
 *
 * -I <name of input file> <br>
 * Specify the training arff file. <p>
 *
 * -C <class index> <br>
 * The index of the attribute to use as the class. <p>
 *
 * -S <search method> <br>
 * The full class name of the search method followed by search method options
 * (if any).<br>
 * Eg. -S "weka.attributeSelection.BestFirst -N 10" <p>
 *
 * -X <number of folds> <br>
 * Perform a cross validation. <p>
 *
 * -N <random number seed> <br>
 * Specify a random number seed. Use in conjuction with -X. (Default = 1). <p>
 *
 * ------------------------------------------------------------------------ <p>
 *
 * Example usage as the main of an attribute evaluator (called FunkyEvaluator):
 * <code> <pre>
 * public static void main(String [] args) {
 *   try {
 *     ASEvaluator eval = new FunkyEvaluator();
 *     System.out.println(SelectAttributes(Evaluator, args));
 *   } catch (Exception e) {
 *     System.err.println(e.getMessage());
 *   }
 * }
 * </code> </pre>
 * <p>
 *
 * ------------------------------------------------------------------------ <p>
 *
 * @author   Mark Hall (mhall@cs.waikato.ac.nz)
 * @version  $Revision: 1.3 $
 */
public class AttributeSelection implements Serializable {

 public static Logger                log = Logger.getLogger(AttributeSelection.class);
  /** the instances to select attributes from */
  private Instances m_trainInstances;

  /** the attribute/subset evaluator */
  private ASEvaluation m_ASEvaluator;

  /** the search method */
  private ASSearch m_searchMethod;

  /** the number of folds to use for cross validation */
  private int m_numFolds;

  /** holds a string describing the results of the attribute selection */
  private StringBuffer m_selectionResults;

  /** rank features (if allowed by the search method) */
  private boolean m_doRank;

  /** do cross validation */
  private boolean m_doXval;

  /** seed used to randomly shuffle instances for cross validation */
  private int m_seed;

  /** cutoff value by which to select attributes for ranked results */
  private double m_threshold;

  /** number of attributes requested from ranked results */
  private int m_numToSelect;

  /** the selected attributes */
  private int [] m_selectedAttributeSet;

  /** the attribute indexes and associated merits if a ranking is produced */
  private double [][] m_attributeRanking;

  /** if a feature selection run involves an attribute transformer */
  private AttributeTransformer m_transformer = null;

  /** the attribute filter for processing instances with respect to
      the most recent feature selection run */
  private AttributeFilter m_attributeFilter = null;

  /** hold statistics for repeated feature selection, such as
      under cross validation */
  private double [][] m_rankResults = null;
  private double [] m_subsetResults = null;
  private int m_trials = 0;

  /**
   * Return the number of attributes selected from the most recent
   * run of attribute selection
   * @return the number of attributes selected
   */
  public int numberAttributesSelected() throws Exception {
    int [] att = selectedAttributes();
    return att.length-1;
  }

  /**
   * get the final selected set of attributes.
   * @return an array of attribute indexes
   * @exception Exception if attribute selection has not been performed yet
   */
  public int [] selectedAttributes () throws Exception {
    if (m_selectedAttributeSet == null) {
      throw new Exception("Attribute selection has not been performed yet!");
    }
    return m_selectedAttributeSet;
  }

  /**
   * get the final ranking of the attributes.
   * @return a two dimensional array of ranked attribute indexes and their
   * associated merit scores as doubles.
   * @exception Exception if a ranking has not been produced
   */
  public double [][] rankedAttributes () throws Exception {
    if (m_attributeRanking == null) {
      throw new Exception("Ranking has not been performed");
    }
    return m_attributeRanking;
  }

  /**
   * set the attribute/subset evaluator
   * @param evaluator the evaluator to use
   */
  public void setEvaluator (ASEvaluation evaluator) {
    m_ASEvaluator = evaluator;
  }

  /**
   * set the search method
   * @param search the search method to use
   */
  public void setSearch (ASSearch search) {
    m_searchMethod = search;

    if (m_searchMethod instanceof RankedOutputSearch) {
      setRanking(((RankedOutputSearch)m_searchMethod).getGenerateRanking());
    }
  }

  /**
   * set the number of folds for cross validation
   * @param folds the number of folds
   */
  public void setFolds (int folds) {
    m_numFolds = folds;
  }

  /**
   * produce a ranking (if possible with the set search and evaluator)
   * @param r true if a ranking is to be produced
   */
  public void setRanking (boolean r) {
    m_doRank = r;
  }

  /**
   * do a cross validation
   * @param x true if a cross validation is to be performed
   */
  public void setXval (boolean x) {
    m_doXval = x;
  }

  /**
   * set the seed for use in cross validation
   * @param s the seed
   */
  public void setSeed (int s) {
    m_seed = s;
  }

  /**
   * set the threshold by which to select features from a ranked list
   * @param t the threshold
   */
  public void setThreshold (double t) {
    m_threshold = t;
  }

  /**
   * get a description of the attribute selection
   * @return a String describing the results of attribute selection
   */
  public String toResultsString() {
    return m_selectionResults.toString();
  }

  /**
   * reduce the dimensionality of a set of instances to include only those
   * attributes chosen by the last run of attribute selection.
   * @param in the instances to be reduced
   * @return a dimensionality reduced set of instances
   * @exception Exception if the instances can't be reduced
   */
  public Instances reduceDimensionality(Instances in) throws Exception {
    if (m_attributeFilter == null) {
      throw new Exception("No feature selection has been performed yet!");
    }

    if (m_transformer != null) {
      Instances transformed = new Instances(m_transformer.transformedHeader(),
					    in.numInstances());
      for (int i=0;i<in.numInstances();i++) {
	transformed.add(m_transformer.convertInstance(in.instance(i)));
      }
      return Filter.useFilter(transformed, m_attributeFilter);
    }

    return Filter.useFilter(in, m_attributeFilter);
  }

  /**
   * reduce the dimensionality of a single instance to include only those
   * attributes chosen by the last run of attribute selection.
   * @param in the instance to be reduced
   * @return a dimensionality reduced instance
   * @exception Exception if the instance can't be reduced
   */
  public Instance reduceDimensionality(Instance in) throws Exception {
    if (m_attributeFilter == null) {
      throw new Exception("No feature selection has been performed yet!");
    }
    if (m_transformer != null) {
      in = m_transformer.convertInstance(in);
    }
    m_attributeFilter.input(in);
    m_attributeFilter.batchFinished();
    Instance result = m_attributeFilter.output();
    return result;
  }

  /**
   * constructor. Sets defaults for each member varaible. Default
   * attribute evaluator is CfsSubsetEval; default search method is
   * BestFirst.
   */
  public AttributeSelection () {
    setFolds(10);
    setRanking(false);
    setXval(false);
    setSeed(1);
    //    m_threshold = -Double.MAX_VALUE;
    setEvaluator(new CfsSubsetEval());
    setSearch(new ForwardSelection());
    m_selectionResults = new StringBuffer();
    m_selectedAttributeSet = null;
    m_attributeRanking = null;
  }

  /**
   * Perform attribute selection with a particular evaluator and
   * a set of options specifying search method and input file etc.
   *
   * @param ASEvaluator an evaluator object
   * @param options an array of options, not only for the evaluator
   * but also the search method (if any) and an input data file
   * @return the results of attribute selection as a String
   * @exception Exception if no training file is set
   */
  public static String SelectAttributes (ASEvaluation ASEvaluator,
					 String[] options)
    throws Exception
  {
    String trainFileName, searchName;
    Instances train = null;
    ASSearch searchMethod = null;

    try {
      // get basic options (options the same for all attribute selectors
      trainFileName = Utils.getOption('I', options);

      if (trainFileName.length() == 0) {
        searchName = Utils.getOption('S', options);

        if (searchName.length() != 0) {
          searchMethod = (ASSearch)Class.forName(searchName).newInstance();
        }

        throw  new Exception("No training file given.");
      }
    }
    catch (Exception e) {
      throw  new Exception('\n' + e.getMessage()
			   + makeOptionString(ASEvaluator, searchMethod));
    }

    train = new Instances(new FileReader(trainFileName));
    return  SelectAttributes(ASEvaluator, options, train);
  }

  /**
   * returns a string summarizing the results of repeated attribute
   * selection runs on splits of a dataset.
   * @return a summary of attribute selection results
   * @exception Exception if no attribute selection has been performed.
   */
  public String CVResultsString () throws Exception {
    StringBuffer CvString = new StringBuffer();

    if ((m_subsetResults == null && m_rankResults == null) ||
	( m_trainInstances == null)) {
      throw new Exception("Attribute selection has not been performed yet!");
    }

    int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -