principalcomponents.java

来自「Weka」· Java 代码 · 共 803 行 · 第 1/2 页

JAVA
803
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PrincipalComponents.java * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand */package weka.filters.unsupervised.attribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SparseInstance;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.matrix.EigenvalueDecomposition;import weka.core.matrix.Matrix;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Performs a principal components analysis and transformation of the data.<br/> * Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data -- default 0.95 (95%).<br/> * Based on code of the attribute selection scheme 'PrincipalComponents' by Mark Hall and Gabi Schmidberger. * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Don't normalize input data.</pre> *  * <pre> -R &lt;num&gt; *  Retain enough PC attributes to account *  for this proportion of variance in the original data. *  (default: 0.95)</pre> *  * <pre> -A &lt;num&gt; *  Maximum number of attributes to include in  *  transformed attribute names. *  (-1 = include all, default: 5)</pre> *  * <pre> -M &lt;num&gt; *  Maximum number of PC attributes to retain. *  (-1 = include all, default: -1)</pre> *  <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) -- attribute selection code * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) -- attribute selection code * @author fracpete (fracpete at waikato dot ac dot nz) -- filter code * @version $Revision: 1.3 $ */public class PrincipalComponents  extends Filter  implements OptionHandler, UnsupervisedFilter {  /** for serialization. */  private static final long serialVersionUID = 4626939780964387784L;  /** The data to transform analyse/transform. */  protected Instances m_TrainInstances;  /** Keep a copy for the class attribute (if set). */  protected Instances m_TrainCopy;  /** The header for the transformed data format. */  protected Instances m_TransformedFormat;  /** Data has a class set. */  protected boolean m_HasClass;  /** Class index. */  protected int m_ClassIndex;  /** Number of attributes. */  protected int m_NumAttribs;  /** Number of instances. */  protected int m_NumInstances;  /** Correlation matrix for the original data. */  protected double[][] m_Correlation;  /** Will hold the unordered linear transformations of the (normalized)      original data. */  protected double[][] m_Eigenvectors;  /** Eigenvalues for the corresponding eigenvectors. */  protected double[] m_Eigenvalues = null;  /** Sorted eigenvalues. */  protected int[] m_SortedEigens;  /** sum of the eigenvalues. */  protected double m_SumOfEigenValues = 0.0;  /** Filters for replacing missing values. */  protected ReplaceMissingValues m_ReplaceMissingFilter;    /** Filter for normalizing the data. */  protected Normalize m_NormalizeFilter;    /** Filter for turning nominal values into numeric ones. */  protected NominalToBinary m_NominalToBinaryFilter;    /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */  protected Remove m_AttributeFilter;  /** The number of attributes in the pc transformed data. */  protected int m_OutputNumAtts = -1;  /** normalize the input data? */  protected boolean m_Normalize = true;  /** the amount of varaince to cover in the original data when      retaining the best n PC's. */  protected double m_CoverVariance = 0.95;  /** maximum number of attributes in the transformed attribute name. */  protected int m_MaxAttrsInName = 5;  /** maximum number of attributes in the transformed data (-1 for all). */  protected int m_MaxAttributes = -1;  /**   * Returns a string describing this filter.   *   * @return 		a description of the filter suitable for   * 			displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Performs a principal components analysis and transformation of "      + "the data.\n"      + "Dimensionality reduction is accomplished by choosing enough eigenvectors "      + "to account for some percentage of the variance in the original data -- "      + "default 0.95 (95%).\n"      + "Based on code of the attribute selection scheme 'PrincipalComponents' "      + "by Mark Hall and Gabi Schmidberger.";  }  /**   * Returns an enumeration describing the available options.   *   * @return 		an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector result = new Vector();    result.addElement(new Option(	"\tDon't normalize input data.", 	"D", 0, "-D"));    result.addElement(new Option(	"\tRetain enough PC attributes to account\n"	+"\tfor this proportion of variance in the original data.\n"	+ "\t(default: 0.95)",	"R", 1, "-R <num>"));    result.addElement(new Option(	"\tMaximum number of attributes to include in \n"	+ "\ttransformed attribute names.\n"	+ "\t(-1 = include all, default: 5)", 	"A", 1, "-A <num>"));    result.addElement(new Option(	"\tMaximum number of PC attributes to retain.\n"	+ "\t(-1 = include all, default: -1)", 	"M", 1, "-M <num>"));    return result.elements();  }  /**   * Parses a list of options for this object. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Don't normalize input data.</pre>   *    * <pre> -R &lt;num&gt;   *  Retain enough PC attributes to account   *  for this proportion of variance in the original data.   *  (default: 0.95)</pre>   *    * <pre> -A &lt;num&gt;   *  Maximum number of attributes to include in    *  transformed attribute names.   *  (-1 = include all, default: 5)</pre>   *    * <pre> -M &lt;num&gt;   *  Maximum number of PC attributes to retain.   *  (-1 = include all, default: -1)</pre>   *    <!-- options-end -->   *   * @param options 	the list of options as an array of strings   * @throws Exception 	if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String        tmpStr;    tmpStr = Utils.getOption('R', options);    if (tmpStr.length() != 0)      setVarianceCovered(Double.parseDouble(tmpStr));    else      setVarianceCovered(0.95);    tmpStr = Utils.getOption('A', options);    if (tmpStr.length() != 0)      setMaximumAttributeNames(Integer.parseInt(tmpStr));    else      setMaximumAttributeNames(5);    tmpStr = Utils.getOption('M', options);    if (tmpStr.length() != 0)      setMaximumAttributes(Integer.parseInt(tmpStr));    else      setMaximumAttributes(-1);    setNormalize(!Utils.getFlag('D', options));  }  /**   * Gets the current settings of the filter.   *   * @return 		an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    Vector<String>	result;    result = new Vector<String>();    result.add("-R");    result.add("" + getVarianceCovered());    result.add("-A");    result.add("" + getMaximumAttributeNames());    result.add("-M");    result.add("" + getMaximumAttributes());    if (!getNormalize())      result.add("-D");    return result.toArray(new String[result.size()]);  }  /**   * Returns the tip text for this property.   *    * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String normalizeTipText() {    return "Normalize input data.";  }  /**   * Set whether input data will be normalized.   *    * @param value 	true if input data is to be normalized   */  public void setNormalize(boolean value) {    m_Normalize = value;  }  /**   * Gets whether or not input data is to be normalized.   *    * @return 		true if input data is to be normalized   */  public boolean getNormalize() {    return m_Normalize;  }  /**   * Returns the tip text for this property.   *    * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String varianceCoveredTipText() {    return "Retain enough PC attributes to account for this proportion of variance.";  }  /**   * Sets the amount of variance to account for when retaining   * principal components.   *    * @param value 	the proportion of total variance to account for   */  public void setVarianceCovered(double value) {    m_CoverVariance = value;  }  /**   * Gets the proportion of total variance to account for when   * retaining principal components.   *    * @return 		the proportion of variance to account for   */  public double getVarianceCovered() {    return m_CoverVariance;  }  /**   * Returns the tip text for this property.   *    * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String maximumAttributeNamesTipText() {    return "The maximum number of attributes to include in transformed attribute names.";  }  /**   * Sets maximum number of attributes to include in   * transformed attribute names.   *    * @param value 	the maximum number of attributes   */  public void setMaximumAttributeNames(int value) {    m_MaxAttrsInName = value;  }  /**   * Gets maximum number of attributes to include in   * transformed attribute names.   *    * @return 		the maximum number of attributes   */  public int getMaximumAttributeNames() {    return m_MaxAttrsInName;  }  /**   * Returns the tip text for this property.   *    * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String maximumAttributesTipText() {    return "The maximum number of PC attributes to retain.";  }  /**   * Sets maximum number of PC attributes to retain.   *    * @param value 	the maximum number of attributes   */  public void setMaximumAttributes(int value) {    m_MaxAttributes = value;  }  /**   * Gets maximum number of PC attributes to retain.   *    * @return 		the maximum number of attributes   */  public int getMaximumAttributes() {    return m_MaxAttributes;  }  /**   * Returns the capabilities of this evaluator.   *   * @return            the capabilities of this evaluator   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?