symmetricaluncertattributeseteval.java

来自「这是关于数据挖掘的一些算法」· Java 代码 · 共 735 行 · 第 1/2 页
JAVA
735 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* *    RELEASE INFORMATION (December 27, 2004) *     *    FCBF algorithm: *      Template obtained from Weka *      Developed for Weka by Zheng Alan Zhao    *      December 27, 2004 * *    FCBF algorithm is a feature selection method based on Symmetrical Uncertainty Measurement for  *    relevance redundancy analysis. The details of FCBF algorithm are in: * <!-- technical-plaintext-start --> * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003. <!-- technical-plaintext-end --> *     *     *     *    CONTACT INFORMATION *     *    For algorithm implementation: *    Zheng Zhao: zhaozheng at asu.edu *       *    For the algorithm: *    Lei Yu: leiyu at asu.edu *    Huan Liu: hliu at asu.edu *      *    Data Mining and Machine Learning Lab *    Computer Science and Engineering Department *    Fulton School of Engineering *    Arizona State University *    Tempe, AZ 85287 * *    SymmetricalUncertAttributeSetEval.java * *    Copyright (C) 2004 Data Mining and Machine Learning Lab,  *                       Computer Science and Engineering Department,  *		    	 Fulton School of Engineering,  *                       Arizona State University * */package weka.attributeSelection;import weka.core.Capabilities;import weka.core.ContingencyTables;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.supervised.attribute.Discretize;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * SymmetricalUncertAttributeSetEval :<br/> * <br/> * Evaluates the worth of a set attributes by measuring the symmetrical uncertainty with respect to another set of attributes. <br/> * <br/> *  SymmU(AttributeSet2, AttributeSet1) = 2 * (H(AttributeSet2) - H(AttributeSet1 | AttributeSet2)) / H(AttributeSet2) + H(AttributeSet1).<br/> * <br/> * For more information see:<br/> * <br/> * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Yu2003, *    author = {Lei Yu and Huan Liu}, *    booktitle = {Proceedings of the Twentieth International Conference on Machine Learning}, *    pages = {856-863}, *    publisher = {AAAI Press}, *    title = {Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution}, *    year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -M *  treat missing values as a seperate value.</pre> *  <!-- options-end --> * * @author Zheng Zhao: zhaozheng at asu.edu * @version $Revision: 1.6 $ * @see Discretize */public class SymmetricalUncertAttributeSetEval  extends AttributeSetEvaluator  implements OptionHandler, TechnicalInformationHandler {    /** for serialization */  static final long serialVersionUID = 8351377335495873202L;  /** The training instances */  private Instances m_trainInstances;  /** The class index */  private int m_classIndex;  /** The number of attributes */  private int m_numAttribs;  /** The number of instances */  private int m_numInstances;  /** The number of classes */  private int m_numClasses;  /** Treat missing values as a seperate value */  private boolean m_missing_merge;  /**   * Returns a string describing this attribute evaluator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "SymmetricalUncertAttributeSetEval :\n\nEvaluates the worth of a set attributes "      +"by measuring the symmetrical uncertainty with respect to another set of attributes. "      +"\n\n SymmU(AttributeSet2, AttributeSet1) = 2 * (H(AttributeSet2) - H(AttributeSet1 | AttributeSet2)) "      +"/ H(AttributeSet2) + H(AttributeSet1).\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Lei Yu and Huan Liu");    result.setValue(Field.TITLE, "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution");    result.setValue(Field.BOOKTITLE, "Proceedings of the Twentieth International Conference on Machine Learning");    result.setValue(Field.YEAR, "2003");    result.setValue(Field.PAGES, "856-863");    result.setValue(Field.PUBLISHER, "AAAI Press");        return result;  }  /**   * Constructor   */  public SymmetricalUncertAttributeSetEval () {    resetOptions();  }  /**   * Returns an enumeration describing the available options.   * @return an enumeration of all the available options.   **/  public Enumeration listOptions () {    Vector newVector = new Vector(1);    newVector.addElement(new Option("\ttreat missing values as a seperate "                                    + "value.", "M", 0, "-M"));    return  newVector.elements();  }  /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -M   *  treat missing values as a seperate value.</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    resetOptions();    setMissingMerge(!(Utils.getFlag('M', options)));  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String missingMergeTipText() {    return "Distribute counts for missing values. Counts are distributed "      +"across other values in proportion to their frequency. Otherwise, "      +"missing is treated as a separate value.";  }  /**   * distribute the counts for missing values across observed values   *   * @param b true=distribute missing values.   */  public void setMissingMerge (boolean b) {    m_missing_merge = b;  }  /**   * get whether missing values are being distributed or not   *   * @return true if missing values are being distributed.   */  public boolean getMissingMerge () {    return  m_missing_merge;  }  /**   * Gets the current settings of WrapperSubsetEval.   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    String[] options = new String[1];    int current = 0;    if (!getMissingMerge()) {      options[current++] = "-M";    }    while (current < options.length) {      options[current++] = "";    }    return  options;  }  /**   * Returns the capabilities of this evaluator.   *   * @return            the capabilities of this evaluator   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);        // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }  /**   * Initializes a symmetrical uncertainty attribute evaluator.   * Discretizes all attributes that are numeric.   *   * @param data set of instances serving as training data   * @throws Exception if the evaluator has not been   * generated successfully   */  public void buildEvaluator (Instances data)    throws Exception {    // can evaluator handle data?    getCapabilities().testWithFail(data);    m_trainInstances = data;    m_classIndex = m_trainInstances.classIndex();    m_numAttribs = m_trainInstances.numAttributes();    m_numInstances = m_trainInstances.numInstances();    Discretize disTransform = new Discretize();    disTransform.setUseBetterEncoding(true);    disTransform.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances, disTransform);    m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();  }  /**   * set options to default values   */  protected void resetOptions () {    m_trainInstances = null;    m_missing_merge = true;  }  /**   * evaluates an individual attribute by measuring the symmetrical   * uncertainty between it and the class.   *   * @param attribute the index of the attribute to be evaluated   * @return the uncertainty   * @throws Exception if the attribute could not be evaluated   */  public double evaluateAttribute (int attribute)    throws Exception {    int i, j, ii, jj;    int ni, nj;    double sum = 0.0;    ni = m_trainInstances.attribute(attribute).numValues() + 1;    nj = m_numClasses + 1;    double[] sumi, sumj;    Instance inst;    double temp = 0.0;    sumi = new double[ni];    sumj = new double[nj];    double[][] counts = new double[ni][nj];    sumi = new double[ni];    sumj = new double[nj];    for (i = 0; i < ni; i++) {      sumi[i] = 0.0;      for (j = 0; j < nj; j++) {        sumj[j] = 0.0;        counts[i][j] = 0.0;      }    }    // Fill the contingency table    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(attribute)) {        ii = ni - 1;      }      else {        ii = (int)inst.value(attribute);      }
symmetricaluncertattributeseteval.java - 源码说明

本页面展示了「这是关于数据挖掘的一些算法」中的 symmetricaluncertattributeseteval.java 源码文件，采用 Java 编程语言编写，共 735 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与数据挖掘相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?