principalcomponents.java
来自「Weka」· Java 代码 · 共 803 行 · 第 1/2 页
JAVA
803 行
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PrincipalComponents.java * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand */package weka.filters.unsupervised.attribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SparseInstance;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.matrix.EigenvalueDecomposition;import weka.core.matrix.Matrix;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Performs a principal components analysis and transformation of the data.<br/> * Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data -- default 0.95 (95%).<br/> * Based on code of the attribute selection scheme 'PrincipalComponents' by Mark Hall and Gabi Schmidberger. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Don't normalize input data.</pre> * * <pre> -R <num> * Retain enough PC attributes to account * for this proportion of variance in the original data. * (default: 0.95)</pre> * * <pre> -A <num> * Maximum number of attributes to include in * transformed attribute names. * (-1 = include all, default: 5)</pre> * * <pre> -M <num> * Maximum number of PC attributes to retain. * (-1 = include all, default: -1)</pre> * <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) -- attribute selection code * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) -- attribute selection code * @author fracpete (fracpete at waikato dot ac dot nz) -- filter code * @version $Revision: 1.3 $ */public class PrincipalComponents extends Filter implements OptionHandler, UnsupervisedFilter { /** for serialization. */ private static final long serialVersionUID = 4626939780964387784L; /** The data to transform analyse/transform. */ protected Instances m_TrainInstances; /** Keep a copy for the class attribute (if set). */ protected Instances m_TrainCopy; /** The header for the transformed data format. */ protected Instances m_TransformedFormat; /** Data has a class set. */ protected boolean m_HasClass; /** Class index. */ protected int m_ClassIndex; /** Number of attributes. */ protected int m_NumAttribs; /** Number of instances. */ protected int m_NumInstances; /** Correlation matrix for the original data. */ protected double[][] m_Correlation; /** Will hold the unordered linear transformations of the (normalized) original data. */ protected double[][] m_Eigenvectors; /** Eigenvalues for the corresponding eigenvectors. */ protected double[] m_Eigenvalues = null; /** Sorted eigenvalues. */ protected int[] m_SortedEigens; /** sum of the eigenvalues. */ protected double m_SumOfEigenValues = 0.0; /** Filters for replacing missing values. */ protected ReplaceMissingValues m_ReplaceMissingFilter; /** Filter for normalizing the data. */ protected Normalize m_NormalizeFilter; /** Filter for turning nominal values into numeric ones. */ protected NominalToBinary m_NominalToBinaryFilter; /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */ protected Remove m_AttributeFilter; /** The number of attributes in the pc transformed data. */ protected int m_OutputNumAtts = -1; /** normalize the input data? */ protected boolean m_Normalize = true; /** the amount of varaince to cover in the original data when retaining the best n PC's. */ protected double m_CoverVariance = 0.95; /** maximum number of attributes in the transformed attribute name. */ protected int m_MaxAttrsInName = 5; /** maximum number of attributes in the transformed data (-1 for all). */ protected int m_MaxAttributes = -1; /** * Returns a string describing this filter. * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Performs a principal components analysis and transformation of " + "the data.\n" + "Dimensionality reduction is accomplished by choosing enough eigenvectors " + "to account for some percentage of the variance in the original data -- " + "default 0.95 (95%).\n" + "Based on code of the attribute selection scheme 'PrincipalComponents' " + "by Mark Hall and Gabi Schmidberger."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tDon't normalize input data.", "D", 0, "-D")); result.addElement(new Option( "\tRetain enough PC attributes to account\n" +"\tfor this proportion of variance in the original data.\n" + "\t(default: 0.95)", "R", 1, "-R <num>")); result.addElement(new Option( "\tMaximum number of attributes to include in \n" + "\ttransformed attribute names.\n" + "\t(-1 = include all, default: 5)", "A", 1, "-A <num>")); result.addElement(new Option( "\tMaximum number of PC attributes to retain.\n" + "\t(-1 = include all, default: -1)", "M", 1, "-M <num>")); return result.elements(); } /** * Parses a list of options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Don't normalize input data.</pre> * * <pre> -R <num> * Retain enough PC attributes to account * for this proportion of variance in the original data. * (default: 0.95)</pre> * * <pre> -A <num> * Maximum number of attributes to include in * transformed attribute names. * (-1 = include all, default: 5)</pre> * * <pre> -M <num> * Maximum number of PC attributes to retain. * (-1 = include all, default: -1)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('R', options); if (tmpStr.length() != 0) setVarianceCovered(Double.parseDouble(tmpStr)); else setVarianceCovered(0.95); tmpStr = Utils.getOption('A', options); if (tmpStr.length() != 0) setMaximumAttributeNames(Integer.parseInt(tmpStr)); else setMaximumAttributeNames(5); tmpStr = Utils.getOption('M', options); if (tmpStr.length() != 0) setMaximumAttributes(Integer.parseInt(tmpStr)); else setMaximumAttributes(-1); setNormalize(!Utils.getFlag('D', options)); } /** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector<String> result; result = new Vector<String>(); result.add("-R"); result.add("" + getVarianceCovered()); result.add("-A"); result.add("" + getMaximumAttributeNames()); result.add("-M"); result.add("" + getMaximumAttributes()); if (!getNormalize()) result.add("-D"); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String normalizeTipText() { return "Normalize input data."; } /** * Set whether input data will be normalized. * * @param value true if input data is to be normalized */ public void setNormalize(boolean value) { m_Normalize = value; } /** * Gets whether or not input data is to be normalized. * * @return true if input data is to be normalized */ public boolean getNormalize() { return m_Normalize; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String varianceCoveredTipText() { return "Retain enough PC attributes to account for this proportion of variance."; } /** * Sets the amount of variance to account for when retaining * principal components. * * @param value the proportion of total variance to account for */ public void setVarianceCovered(double value) { m_CoverVariance = value; } /** * Gets the proportion of total variance to account for when * retaining principal components. * * @return the proportion of variance to account for */ public double getVarianceCovered() { return m_CoverVariance; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String maximumAttributeNamesTipText() { return "The maximum number of attributes to include in transformed attribute names."; } /** * Sets maximum number of attributes to include in * transformed attribute names. * * @param value the maximum number of attributes */ public void setMaximumAttributeNames(int value) { m_MaxAttrsInName = value; } /** * Gets maximum number of attributes to include in * transformed attribute names. * * @return the maximum number of attributes */ public int getMaximumAttributeNames() { return m_MaxAttrsInName; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String maximumAttributesTipText() { return "The maximum number of PC attributes to retain."; } /** * Sets maximum number of PC attributes to retain. * * @param value the maximum number of attributes */ public void setMaximumAttributes(int value) { m_MaxAttributes = value; } /** * Gets maximum number of PC attributes to retain. * * @return the maximum number of attributes */ public int getMaximumAttributes() { return m_MaxAttributes; } /** * Returns the capabilities of this evaluator. * * @return the capabilities of this evaluator * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?