kernelfilter.java

来自「Weka」· Java 代码 · 共 877 行 · 第 1/2 页

JAVA
877
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * KernelFilter.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.filters.unsupervised.attribute;import weka.classifiers.functions.supportVector.Kernel;import weka.classifiers.functions.supportVector.PolyKernel;import weka.classifiers.functions.supportVector.RBFKernel;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.MathematicalExpression;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SingleIndex;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.converters.ConverterUtils.DataSource;import weka.filters.AllFilter;import weka.filters.Filter;import weka.filters.SimpleBatchFilter;import weka.filters.UnsupervisedFilter;import java.io.File;import java.util.Enumeration;import java.util.HashMap;import java.util.Vector;/** <!-- globalinfo-start --> * Converts the given set of predictor variables into a kernel matrix. The class value remains unchangedm, as long as the preprocessing filter doesn't change it.<br/> * By default, the data is preprocessed with the Center filter, but the user can choose any filter (NB: one must be careful that the filter does not alter the class attribute unintentionally). With weka.filters.AllFilter the preprocessing gets disabled.<br/> * <br/> * For more information regarding preprocessing the data, see:<br/> * <br/> * K.P. Bennett, M.J. Embrechts: An Optimization Perspective on Kernel Partial Least Squares Regression. In: Advances in Learning Theory: Methods, Models and Applications, 227-249, 2003. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Bennett2003, *    author = {K.P. Bennett and M.J. Embrechts}, *    booktitle = {Advances in Learning Theory: Methods, Models and Applications}, *    editor = {J. Suykens et al.}, *    pages = {227-249}, *    publisher = {IOS Press, Amsterdam, The Netherlands}, *    series = {NATO Science Series, Series III: Computer and System Sciences}, *    title = {An Optimization Perspective on Kernel Partial Least Squares Regression}, *    volume = {190}, *    year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Turns on output of debugging information.</pre> *  * <pre> -no-checks *  Turns off all checks - use with caution! *  Turning them off assumes that data is purely numeric, doesn't *  contain any missing values, and has a nominal class. Turning them *  off also means that no header information will be stored if the *  machine is linear. Finally, it also assumes that no instance has *  a weight equal to 0. *  (default: checks on)</pre> *  * <pre> -F &lt;filename&gt; *  The file to initialize the filter with (optional).</pre> *  * <pre> -C &lt;num&gt; *  The class index for the file to initialize with, *  First and last are valid (optional, default: last).</pre> *  * <pre> -K &lt;classname and parameters&gt; *  The Kernel to use. *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre> *  * <pre> -kernel-factor *  Defines a factor for the kernel. *   - RBFKernel: a factor for gamma *    Standardize: 1/(2*N) *    Normalize..: 6/N *  Available parameters are: *   N for # of instances, A for # of attributes *  (default: 1)</pre> *  * <pre> -P &lt;classname and parameters&gt; *  The Filter used for preprocessing (use weka.filters.AllFilter *  to disable preprocessing). *  (default: weka.filters.unsupervised.attribute.Center)</pre> *  * <pre>  * Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel: * </pre> *  * <pre> -D *  Enables debugging output (if available) to be printed. *  (default: off)</pre> *  * <pre> -no-checks *  Turns off all checks - use with caution! *  (default: checks on)</pre> *  * <pre> -C &lt;num&gt; *  The size of the cache (a prime number), 0 for full cache and  *  -1 to turn it off. *  (default: 250007)</pre> *  * <pre> -E &lt;num&gt; *  The Exponent to use. *  (default: 1.0)</pre> *  * <pre> -L *  Use lower-order terms. *  (default: no)</pre> *  * <pre>  * Options specific to preprocessing filter weka.filters.unsupervised.attribute.Center: * </pre> *  * <pre> -unset-class-temporarily *  Unsets the class index temporarily before the filter is *  applied to the data. *  (default: no)</pre> *  <!-- options-end --> * * @author Jonathan Miles (jdm18@cs.waikato.ac.nz)  * @author FracPete (fracpete at waikato dot ac dot nz)  * @version $Revision: 1.1 $ */public class KernelFilter  extends SimpleBatchFilter   implements UnsupervisedFilter, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 213800899640387499L;  /** The number of instances in the training data. */  protected int m_NumTrainInstances;  /** Kernel to use **/  protected Kernel m_Kernel = new PolyKernel();  /** the Kernel which is actually used for computation */  protected Kernel m_ActualKernel = null;  /** Turn off all checks and conversions? Turning them off assumes      that data is purely numeric, doesn't contain any missing values,      and has a nominal class. Turning them off also means that      no header information will be stored if the machine is linear.       Finally, it also assumes that no instance has a weight equal to 0.*/  protected boolean m_checksTurnedOff;  /** The filter used to make attributes numeric. */  protected NominalToBinary m_NominalToBinary;  /** The filter used to get rid of missing values. */  protected ReplaceMissingValues m_Missing;  /** The dataset to initialize the filter with */  protected File m_InitFile = new File(System.getProperty("user.dir"));  /** the class index for the file to initialized with    * @see #m_InitFile */  protected SingleIndex m_InitFileClassIndex = new SingleIndex("last");    /** whether the filter was initialized */  protected boolean m_Initialized = false;  /** optimizes the kernel with this formula    * (A = # of attributes, N = # of instances)*/  protected String m_KernelFactorExpression = "1";  /** the calculated kernel factor   * @see #m_KernelFactorExpression */  protected double m_KernelFactor = 1.0;    /** for centering/standardizing the data */  protected Filter m_Filter = new Center();    /** for centering/standardizing the data (the actual filter to use) */  protected Filter m_ActualFilter = null;    /**   * Returns a string describing this filter.   *   * @return      a description of the filter suitable for   *              displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Converts the given set of predictor variables into a kernel matrix. "      + "The class value remains unchangedm, as long as the preprocessing "      + "filter doesn't change it.\n"      + "By default, the data is preprocessed with the Center filter, but the "      + "user can choose any filter (NB: one must be careful that the filter "      + "does not alter the class attribute unintentionally). With "      + "weka.filters.AllFilter the preprocessing gets disabled.\n\n"      + "For more information regarding preprocessing the data, see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "K.P. Bennett and M.J. Embrechts");    result.setValue(Field.TITLE, "An Optimization Perspective on Kernel Partial Least Squares Regression");    result.setValue(Field.YEAR, "2003");    result.setValue(Field.EDITOR, "J. Suykens et al.");    result.setValue(Field.BOOKTITLE, "Advances in Learning Theory: Methods, Models and Applications");    result.setValue(Field.PAGES, "227-249");    result.setValue(Field.PUBLISHER, "IOS Press, Amsterdam, The Netherlands");    result.setValue(Field.SERIES, "NATO Science Series, Series III: Computer and System Sciences");    result.setValue(Field.VOLUME, "190");        return result;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector        result;    Enumeration   enm;    result = new Vector();    enm = super.listOptions();    while (enm.hasMoreElements())      result.addElement(enm.nextElement());        result.addElement(new Option(	"\tTurns off all checks - use with caution!\n"	+ "\tTurning them off assumes that data is purely numeric, doesn't\n"	+ "\tcontain any missing values, and has a nominal class. Turning them\n"	+ "\toff also means that no header information will be stored if the\n"	+ "\tmachine is linear. Finally, it also assumes that no instance has\n"	+ "\ta weight equal to 0.\n"	+ "\t(default: checks on)",	"no-checks", 0, "-no-checks"));    result.addElement(new Option(	"\tThe file to initialize the filter with (optional).",	"F", 1, "-F <filename>"));    result.addElement(new Option(	"\tThe class index for the file to initialize with,\n"	+ "\tFirst and last are valid (optional, default: last).",	"C", 1, "-C <num>"));    result.addElement(new Option(	"\tThe Kernel to use.\n"	+ "\t(default: weka.classifiers.functions.supportVector.PolyKernel)",	"K", 1, "-K <classname and parameters>"));    result.addElement(new Option(	"\tDefines a factor for the kernel.\n"	+ "\t\t- RBFKernel: a factor for gamma\n"	+ "\t\t\tStandardize: 1/(2*N)\n"	+ "\t\t\tNormalize..: 6/N\n"	+ "\tAvailable parameters are:\n"	+ "\t\tN for # of instances, A for # of attributes\n"	+ "\t(default: 1)",	"kernel-factor", 0, "-kernel-factor"));    result.addElement(new Option(	"\tThe Filter used for preprocessing (use weka.filters.AllFilter\n"	+ "\tto disable preprocessing).\n"	+ "\t(default: " + Center.class.getName() + ")",	"P", 1, "-P <classname and parameters>"));    // kernel options    result.addElement(new Option(	"",	"", 0, "\nOptions specific to kernel "	+ getKernel().getClass().getName() + ":"));        enm = ((OptionHandler) getKernel()).listOptions();    while (enm.hasMoreElements())      result.addElement(enm.nextElement());    // filter options    if (getPreprocessing() instanceof OptionHandler) {      result.addElement(new Option(	  "",	  "", 0, "\nOptions specific to preprocessing filter "	  + getPreprocessing().getClass().getName() + ":"));      enm = ((OptionHandler) getPreprocessing()).listOptions();      while (enm.hasMoreElements())	result.addElement(enm.nextElement());    }        return result.elements();  }	    /**   * Gets the current settings of the filter.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    int		i;    Vector	result;    String[]	options;    String	tmpStr;    result = new Vector();    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);        if (getChecksTurnedOff())      result.add("-no-checks");    if ((getInitFile() != null) && getInitFile().isFile()) {      result.add("-F");      result.add("" + getInitFile().getAbsolutePath());      result.add("-C");      result.add("" + getInitFileClassIndex());    }    result.add("-K");    result.add("" + getKernel().getClass().getName() + " " + Utils.joinOptions(getKernel().getOptions()));    result.add("-kernel-factor");    result.add("" + getKernelFactorExpression());    result.add("-P");    tmpStr = getPreprocessing().getClass().getName();    if (getPreprocessing() instanceof OptionHandler)      tmpStr += " " + Utils.joinOptions(((OptionHandler) getPreprocessing()).getOptions());    result.add("" + tmpStr);    return (String[]) result.toArray(new String[result.size()]);	    }	    /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Turns on output of debugging information.</pre>   *    * <pre> -no-checks   *  Turns off all checks - use with caution!   *  Turning them off assumes that data is purely numeric, doesn't   *  contain any missing values, and has a nominal class. Turning them   *  off also means that no header information will be stored if the   *  machine is linear. Finally, it also assumes that no instance has   *  a weight equal to 0.   *  (default: checks on)</pre>   *    * <pre> -F &lt;filename&gt;   *  The file to initialize the filter with (optional).</pre>   *    * <pre> -C &lt;num&gt;   *  The class index for the file to initialize with,   *  First and last are valid (optional, default: last).</pre>   *    * <pre> -K &lt;classname and parameters&gt;   *  The Kernel to use.   *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>   *    * <pre> -kernel-factor   *  Defines a factor for the kernel.   *   - RBFKernel: a factor for gamma   *    Standardize: 1/(2*N)   *    Normalize..: 6/N   *  Available parameters are:   *   N for # of instances, A for # of attributes   *  (default: 1)</pre>   *    * <pre> -P &lt;classname and parameters&gt;   *  The Filter used for preprocessing (use weka.filters.AllFilter   *  to disable preprocessing).   *  (default: weka.filters.unsupervised.attribute.Center)</pre>   *    * <pre>    * Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:   * </pre>   *    * <pre> -D   *  Enables debugging output (if available) to be printed.   *  (default: off)</pre>   *    * <pre> -no-checks   *  Turns off all checks - use with caution!   *  (default: checks on)</pre>   *    * <pre> -C &lt;num&gt;   *  The size of the cache (a prime number), 0 for full cache and    *  -1 to turn it off.   *  (default: 250007)</pre>

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?