numericcleaner.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 811 行 · 第 1/2 页
JAVA
811 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * NumericCleaner.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand */package weka.filters.unsupervised.attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.Range;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.SimpleStreamFilter;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * A filter that 'cleanses' the numeric data from values that are too small, too big or very close to a certain value (e.g., 0) and sets these values to a pre-defined default. * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Turns on output of debugging information.</pre> *  * <pre> -min &lt;double&gt; *  The minimum threshold. (default -Double.MAX_VALUE)</pre> *  * <pre> -min-default &lt;double&gt; *  The replacement for values smaller than the minimum threshold. *  (default -Double.MAX_VALUE)</pre> *  * <pre> -max &lt;double&gt; *  The maximum threshold. (default Double.MAX_VALUE)</pre> *  * <pre> -max-default &lt;double&gt; *  The replacement for values larger than the maximum threshold. *  (default Double.MAX_VALUE)</pre> *  * <pre> -closeto &lt;double&gt; *  The number values are checked for closeness. (default 0)</pre> *  * <pre> -closeto-default &lt;double&gt; *  The replacement for values that are close to '-closeto'. *  (default 0)</pre> *  * <pre> -closeto-tolerance &lt;double&gt; *  The tolerance below which numbers are considered being close to  *  to each other. (default 1E-6)</pre> *  * <pre> -decimals &lt;int&gt; *  The number of decimals to round to, -1 means no rounding at all. *  (default -1)</pre> *  * <pre> -R &lt;col1,col2,...&gt; *  The list of columns to cleanse, e.g., first-last or first-3,5-last. *  (default first-last)</pre> *  * <pre> -V *  Inverts the matching sense.</pre> *  * <pre> -include-class *  Whether to include the class in the cleansing. *  The class column will always be skipped, if this flag is not *  present. (default no)</pre> *  <!-- options-end --> * * @author  fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.1 $ */public class NumericCleaner  extends SimpleStreamFilter {  /** for serialization */  private static final long serialVersionUID = -352890679895066592L;  /** the minimum threshold */  protected double m_MinThreshold = -Double.MAX_VALUE;  /** the minimum default replacement value */  protected double m_MinDefault = -Double.MAX_VALUE;  /** the maximum threshold */  protected double m_MaxThreshold = Double.MAX_VALUE;  /** the maximum default replacement value */  protected double m_MaxDefault = Double.MAX_VALUE;  /** the number the values are checked for closeness to */  protected double m_CloseTo = 0;  /** the default replacement value for numbers "close-to" */  protected double m_CloseToDefault = 0;  /** the tolerance distance, below which numbers are considered being "close-to" */  protected double m_CloseToTolerance = 1E-6;  /** Stores which columns to cleanse */  protected Range m_Cols = new Range("first-last");  /** whether to include the class attribute */  protected boolean m_IncludeClass = false;    /** the number of decimals to round to (-1 means no rounding) */  protected int m_Decimals = -1;    /**   * Returns a string describing this filter.   *   * @return      a description of the filter suitable for   *              displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "A filter that 'cleanses' the numeric data from values that are too "      + "small, too big or very close to a certain value (e.g., 0) and sets "      + "these values to a pre-defined default.";  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector        result;    Enumeration   enm;    result = new Vector();    enm = super.listOptions();    while (enm.hasMoreElements())      result.addElement(enm.nextElement());    result.addElement(new Option(	"\tThe minimum threshold. (default -Double.MAX_VALUE)",	"min", 1, "-min <double>"));        result.addElement(new Option(	"\tThe replacement for values smaller than the minimum threshold.\n"	+ "\t(default -Double.MAX_VALUE)",	"min-default", 1, "-min-default <double>"));    result.addElement(new Option(	"\tThe maximum threshold. (default Double.MAX_VALUE)",	"max", 1, "-max <double>"));        result.addElement(new Option(	"\tThe replacement for values larger than the maximum threshold.\n"	+ "\t(default Double.MAX_VALUE)",	"max-default", 1, "-max-default <double>"));    result.addElement(new Option(	"\tThe number values are checked for closeness. (default 0)",	"closeto", 1, "-closeto <double>"));        result.addElement(new Option(	"\tThe replacement for values that are close to '-closeto'.\n"	+ "\t(default 0)",	"closeto-default", 1, "-closeto-default <double>"));        result.addElement(new Option(	"\tThe tolerance below which numbers are considered being close to \n"	+ "\tto each other. (default 1E-6)",	"closeto-tolerance", 1, "-closeto-tolerance <double>"));    result.addElement(new Option(	"\tThe number of decimals to round to, -1 means no rounding at all.\n"	+ "\t(default -1)",	"decimals", 1, "-decimals <int>"));        result.addElement(new Option(	"\tThe list of columns to cleanse, e.g., first-last or first-3,5-last.\n"	+ "\t(default first-last)",	"R", 1, "-R <col1,col2,...>"));    result.addElement(new Option(	"\tInverts the matching sense.",	"V", 0, "-V"));    result.addElement(new Option(	"\tWhether to include the class in the cleansing.\n"	+ "\tThe class column will always be skipped, if this flag is not\n"	+ "\tpresent. (default no)",	"include-class", 0, "-include-class"));    return result.elements();  }	    /**   * Gets the current settings of the filter.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    int       i;    Vector    result;    String[]  options;    result = new Vector();    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);    result.add("-min");     result.add("" + m_MinThreshold);    result.add("-min-default");     result.add("" + m_MinDefault);    result.add("-max");     result.add("" + m_MaxThreshold);    result.add("-max-default");     result.add("" + m_MaxDefault);    result.add("-closeto");     result.add("" + m_CloseTo);    result.add("-closeto-default");     result.add("" + m_CloseToDefault);        result.add("-closeto-tolerance");     result.add("" + m_CloseToTolerance);    result.add("-R");     result.add("" + m_Cols.getRanges());    if (m_Cols.getInvert())      result.add("-V");        if (m_IncludeClass)      result.add("-include-class");     result.add("-decimals");     result.add("" + getDecimals());    return (String[]) result.toArray(new String[result.size()]);	    }	    /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Turns on output of debugging information.</pre>   *    * <pre> -min &lt;double&gt;   *  The minimum threshold. (default -Double.MAX_VALUE)</pre>   *    * <pre> -min-default &lt;double&gt;   *  The replacement for values smaller than the minimum threshold.   *  (default -Double.MAX_VALUE)</pre>   *    * <pre> -max &lt;double&gt;   *  The maximum threshold. (default Double.MAX_VALUE)</pre>   *    * <pre> -max-default &lt;double&gt;   *  The replacement for values larger than the maximum threshold.   *  (default Double.MAX_VALUE)</pre>   *    * <pre> -closeto &lt;double&gt;   *  The number values are checked for closeness. (default 0)</pre>   *    * <pre> -closeto-default &lt;double&gt;   *  The replacement for values that are close to '-closeto'.   *  (default 0)</pre>   *    * <pre> -closeto-tolerance &lt;double&gt;   *  The tolerance below which numbers are considered being close to    *  to each other. (default 1E-6)</pre>   *    * <pre> -decimals &lt;int&gt;   *  The number of decimals to round to, -1 means no rounding at all.   *  (default -1)</pre>   *    * <pre> -R &lt;col1,col2,...&gt;   *  The list of columns to cleanse, e.g., first-last or first-3,5-last.   *  (default first-last)</pre>   *    * <pre> -V   *  Inverts the matching sense.</pre>   *    * <pre> -include-class   *  Whether to include the class in the cleansing.   *  The class column will always be skipped, if this flag is not   *  present. (default no)</pre>   *    <!-- options-end -->   *    * @param options the list of options as an array of strings   * @throws Exception if an option is not supported    */  public void setOptions(String[] options) throws Exception {    String	tmpStr;    tmpStr = Utils.getOption("min", options);    if (tmpStr.length() != 0)      setMinThreshold(Double.parseDouble(tmpStr));    else      setMinThreshold(-Double.MAX_VALUE);        tmpStr = Utils.getOption("min-default", options);    if (tmpStr.length() != 0)      setMinDefault(Double.parseDouble(tmpStr));    else      setMinDefault(-Double.MAX_VALUE);        tmpStr = Utils.getOption("max", options);    if (tmpStr.length() != 0)      setMaxThreshold(Double.parseDouble(tmpStr));    else      setMaxThreshold(Double.MAX_VALUE);        tmpStr = Utils.getOption("max-default", options);    if (tmpStr.length() != 0)      setMaxDefault(Double.parseDouble(tmpStr));    else      setMaxDefault(Double.MAX_VALUE);        tmpStr = Utils.getOption("closeto", options);    if (tmpStr.length() != 0)      setCloseTo(Double.parseDouble(tmpStr));    else      setCloseTo(0);        tmpStr = Utils.getOption("closeto-default", options);    if (tmpStr.length() != 0)      setCloseToDefault(Double.parseDouble(tmpStr));    else      setCloseToDefault(0);        tmpStr = Utils.getOption("closeto-tolerance", options);    if (tmpStr.length() != 0)      setCloseToTolerance(Double.parseDouble(tmpStr));    else      setCloseToTolerance(1E-6);        tmpStr = Utils.getOption("R", options);    if (tmpStr.length() != 0)      setAttributeIndices(tmpStr);    else      setAttributeIndices("first-last");        setInvertSelection(Utils.getFlag("V", options));        setIncludeClass(Utils.getFlag("include-class", options));    tmpStr = Utils.getOption("decimals", options);    if (tmpStr.length() != 0)      setDecimals(Integer.parseInt(tmpStr));    else      setDecimals(-1);        super.setOptions(options);  }	    /**    * Returns the Capabilities of this filter.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enableAllAttributes();    result.enable(Capability.MISSING_VALUES);        // class    result.enableAllClasses();    result.enable(Capability.MISSING_CLASS_VALUES);    result.enable(Capability.NO_CLASS);        return result;  }    /**   * Determines the output format based on the input format and returns
numericcleaner.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 numericcleaner.java 源码文件，采用 Java 编程语言编写，共 811 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?