⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 relieffattributeeval.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:

/**
 *
 *   AgentAcademy - an open source Data Mining framework for
 *   training intelligent agents
 *
 *   Copyright (C)   2001-2003 AA Consortium.
 *
 *   This library is open source software; you can redistribute it
 *   and/or modify it under the terms of the GNU Lesser General
 *   Public License as published by the Free Software Foundation;
 *   either version 2.0 of the License, or (at your option) any later
 *   version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free
 *   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *   MA  02111-1307 USA
 *
 */

package  org.agentacademy.modules.dataminer.attributeSelection;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import org.agentacademy.modules.dataminer.core.Attribute;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Utils;
import org.apache.log4j.Logger;

/**
 * Class for Evaluating attributes individually using ReliefF. <p>
 *
 * For more information see: <p>
 *
 * Kira, K. and Rendell, L. A. (1992). A practical approach to feature
 * selection. In D. Sleeman and P. Edwards, editors, <i>Proceedings of
 * the International Conference on Machine Learning,</i> pages 249-256.
 * Morgan Kaufmann. <p>
 *
 * Kononenko, I. (1994). Estimating attributes: analysis and extensions of
 * Relief. In De Raedt, L. and Bergadano, F., editors, <i> Machine Learning:
 * ECML-94, </i> pages 171-182. Springer Verlag. <p>
 *
 * Marko Robnik Sikonja, Igor Kononenko: An adaptation of Relief for attribute
 * estimation on regression. In D.Fisher (ed.): <i> Machine Learning,
 * Proceedings of 14th International Conference on Machine Learning ICML'97,
 * </i> Nashville, TN, 1997. <p>
 *
 *
 * Valid options are:
 *
 * -M <number of instances> <br>
 * Specify the number of instances to sample when estimating attributes. <br>
 * If not specified then all instances will be used. <p>
 *
 * -D <seed> <br>
 * Seed for randomly sampling instances. <p>
 *
 * -K <number of neighbours> <br>
 * Number of nearest neighbours to use for estimating attributes. <br>
 * (Default is 10). <p>
 *
 * -W <br>
 * Weight nearest neighbours by distance. <p>
 *
 * -A <sigma> <br>
 * Specify sigma value (used in an exp function to control how quickly <br>
 * weights decrease for more distant instances). Use in conjunction with <br>
 * -W. Sensible values = 1/5 to 1/10 the number of nearest neighbours. <br>
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 1.3 $
 */
public class ReliefFAttributeEval
  extends AttributeEvaluator
  implements OptionHandler
{
 public static Logger                log = Logger.getLogger(ReliefFAttributeEval.class);
  /** The training instances */
  private Instances m_trainInstances;

  /** The class index */
  private int m_classIndex;

  /** The number of attributes */
  private int m_numAttribs;

  /** The number of instances */
  private int m_numInstances;

  /** Numeric class */
  private boolean m_numericClass;

  /** The number of classes if class is nominal */
  private int m_numClasses;

  /**
   * Used to hold the probability of a different class val given nearest
   * instances (numeric class)
   */
  private double m_ndc;

  /**
   * Used to hold the prob of different value of an attribute given
   * nearest instances (numeric class case)
   */
  private double[] m_nda;

  /**
   * Used to hold the prob of a different class val and different att
   * val given nearest instances (numeric class case)
   */
  private double[] m_ndcda;

  /** Holds the weights that relief assigns to attributes */
  private double[] m_weights;

  /** Prior class probabilities (discrete class case) */
  private double[] m_classProbs;

  /**
   * The number of instances to sample when estimating attributes
   * default == -1, use all instances
   */
  private int m_sampleM;

  /** The number of nearest hits/misses */
  private int m_Knn;

  /** k nearest scores + instance indexes for n classes */
  private double[][][] m_karray;

  /** Upper bound for numeric attributes */
  private double[] m_maxArray;

  /** Lower bound for numeric attributes */
  private double[] m_minArray;

  /** Keep track of the farthest instance for each class */
  private double[] m_worst;

  /** Index in the m_karray of the farthest instance for each class */
  private int[] m_index;

  /** Number of nearest neighbours stored of each class */
  private int[] m_stored;

  /** Random number seed used for sampling instances */
  private int m_seed;

  /**
   *  used to (optionally) weight nearest neighbours by their distance
   *  from the instance in question. Each entry holds
   *  exp(-((rank(r_i, i_j)/sigma)^2)) where rank(r_i,i_j) is the rank of
   *  instance i_j in a sequence of instances ordered by the distance
   *  from r_i. sigma is a user defined parameter, default=20
   **/
  private double[] m_weightsByRank;
  private int m_sigma;

  /** Weight by distance rather than equal weights */
  private boolean m_weightByDistance;

  /**
   * Returns a string describing this attribute evaluator
   * @return a description of the evaluator suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "ReliefFAttributeEval :\n\nEvaluates the worth of an attribute by "
      +"repeatedly sampling an instance and considering the value of the "
      +"given attribute for the nearest instance of the same and different "
      +"class. Can operate on both discrete and continuous class data.\n";
  }

  /**
   * Constructor
   */
  public ReliefFAttributeEval () {
    resetOptions();
  }


  /**
   * Returns an enumeration describing the available options.
   * @return an enumeration of all the available options.
   **/
  public Enumeration listOptions () {
    Vector newVector = new Vector(4);
    newVector
      .addElement(new Option("\tSpecify the number of instances to\n"
			     + "\tsample when estimating attributes.\n"
			     + "\tIf not specified, then all instances\n"
			     + "\twill be used.", "M", 1
			     , "-M <num instances>"));
    newVector.
      addElement(new Option("\tSeed for randomly sampling instances.\n"
			    + "\t(Default = 1)", "D", 1
			    , "-D <seed>"));
    newVector.
      addElement(new Option("\tNumber of nearest neighbours (k) used\n"
			    + "\tto estimate attribute relevances\n"
			    + "\t(Default = 10).", "K", 1
			    , "-K <number of neighbours>"));
    newVector.
      addElement(new Option("\tWeight nearest neighbours by distance\n", "W"
			    , 0, "-W"));
    newVector.
      addElement(new Option("\tSpecify sigma value (used in an exp\n"
			    + "\tfunction to control how quickly\n"
			    + "\tweights for more distant instances\n"
			    + "\tdecrease. Use in conjunction with -W.\n"
			    + "\tSensible value=1/5 to 1/10 of the\n"
			    + "\tnumber of nearest neighbours.\n"
			    + "\t(Default = 2)", "A", 1, "-A <num>"));
    return  newVector.elements();
  }


  /**
   * Parses a given list of options.
   *
   * Valid options are: <p>
   *
   * -M <number of instances> <br>
   * Specify the number of instances to sample when estimating attributes. <br>
   * If not specified then all instances will be used. <p>
   *
   * -D <seed> <br>
   * Seed for randomly sampling instances. <p>
   *
   * -K <number of neighbours> <br>
   * Number of nearest neighbours to use for estimating attributes. <br>
   * (Default is 10). <p>
   *
   * -W <br>
   * Weight nearest neighbours by distance. <p>
   *
   * -A <sigma> <br>
   * Specify sigma value (used in an exp function to control how quickly <br>
   * weights decrease for more distant instances). Use in conjunction with <br>
   * -W. Sensible values = 1/5 to 1/10 the number of nearest neighbours. <br>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   *
   **/
  public void setOptions (String[] options)
    throws Exception
  {
    String optionString;
    resetOptions();
    setWeightByDistance(Utils.getFlag('W', options));
    optionString = Utils.getOption('M', options);

    if (optionString.length() != 0) {
      setSampleSize(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('D', options);

    if (optionString.length() != 0) {
      setSeed(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('K', options);

    if (optionString.length() != 0) {
      setNumNeighbours(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('A', options);

    if (optionString.length() != 0) {
      setWeightByDistance(true); // turn on weighting by distance
      setSigma(Integer.parseInt(optionString));
    }
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String sigmaTipText() {
    return "Set influence of nearest neighbours. Used in an exp function to "
      +"control how quickly weights decrease for more distant instances. "
      +"Use in conjunction with weightByDistance. Sensible values = 1/5 to "
      +"1/10 the number of nearest neighbours.";
  }

  /**
   * Sets the sigma value.
   *
   * @param s the value of sigma (> 0)
   * @exception Exception if s is not positive
   */
  public void setSigma (int s)
    throws Exception
  {
    if (s <= 0) {
      throw  new Exception("value of sigma must be > 0!");
    }

    m_sigma = s;
  }


  /**
   * Get the value of sigma.
   *
   * @return the sigma value.
   */
  public int getSigma () {
    return  m_sigma;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numNeighboursTipText() {
    return "Number of nearest neighbours for attribute estimation.";
  }

  /**
   * Set the number of nearest neighbours
   *
   * @param n the number of nearest neighbours.
   */
  public void setNumNeighbours (int n) {
    m_Knn = n;
  }


  /**
   * Get the number of nearest neighbours
   *
   * @return the number of nearest neighbours
   */
  public int getNumNeighbours () {
    return  m_Knn;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String seedTipText() {
    return "Random seed for sampling instances.";
  }

  /**
   * Set the random number seed for randomly sampling instances.
   *
   * @param s the random number seed.
   */
  public void setSeed (int s) {
    m_seed = s;
  }


  /**
   * Get the seed used for randomly sampling instances.
   *
   * @return the random number seed.
   */
  public int getSeed () {
    return  m_seed;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String sampleSizeTipText() {
    return "Number of instances to sample. Default (-1) indicates that all "
      +"instances will be used for attribute estimation.";
  }

  /**
   * Set the number of instances to sample for attribute estimation
   *
   * @param s the number of instances to sample.
   */
  public void setSampleSize (int s) {
    m_sampleM = s;
  }


  /**
   * Get the number of instances used for estimating attributes
   *
   * @return the number of instances.
   */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -