📄 matlabpca.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * MatlabPCA.java * Copyright (C) 2002 Mikhail Bilenko * */package weka.attributeSelection;import java.io.*;import java.util.*;import weka.core.*;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import weka.filters.unsupervised.attribute.Normalize;import weka.filters.unsupervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.Remove;import weka.filters.Filter;/** * Class for performing principal components analysis/transformation. <p> * * Valid options are:<p> * -N <br> * Don't normalize the input data. <p> * * -R <variance> <br> * Retain enough pcs to account for this proportion of the variance. <p> * * -T <br> * Transform through the PC space and back to the original space. <p> * * @author Misha Bilenko (mbilenko@cs.utexas.edu) * @author Sugato Basu (sugato@cs.utexas.edu) * @version $Revision: 1.1.1.1 $ */public class MatlabPCA extends AttributeEvaluator implements AttributeTransformer, OptionHandler { /** Turns the debugging output on/off */ private boolean m_debug = true; /** The data to transform analyse/transform */ private Instances m_trainInstances; /** Keep a copy for the class attribute (if set) */ private Instances m_trainCopy; /** The header for the transformed data format */ private Instances m_transformedFormat; /** The header for data transformed back to the original space */ private Instances m_originalSpaceFormat; /** Data has a class set */ private boolean m_hasClass; /** Class index */ private int m_classIndex; /** Number of attributes */ private int m_numAttribs; /** Number of instances */ private int m_numInstances; /** Name of the Matlab program file that computes PCA */ protected String m_PCAMFile = new String("/var/local/MatlabPCA.m"); /** Will hold the ordered linear transformations of the (normalized) original data */ private double [][] m_eigenvectors; /** Eigenvalues for the corresponding eigenvectors */ private double [] m_eigenvalues = null; /** A timestamp suffix for matching vectors with attributes */ String m_timestamp = null; /** Name of the file where attribute names will be stored */ String m_pcaAttributeFilename = null; String m_pcaAttributeFilenameBase = new String("/var/local/PCAattributes"); /** Name of the file where original data will be stored */ String m_dataFilename = new String("/var/local/PCAdataMatrix.txt"); /** Name of the file where eigenvectors will be stored */ public String m_eigenvectorFilename = null; public String m_eigenvectorFilenameBase = new String("/var/local/PCAeigenVectors"); /** Name of the file where eigenvalues will be stored */ public String m_eigenvalueFilename = new String("/var/local/PCAeigenValues.txt"); /** sum of the eigenvalues */ private double m_sumOfEigenValues = 0.0; /** Filters for original data */ private ReplaceMissingValues m_replaceMissingFilter; private Normalize m_normalizeFilter; private Remove m_attributeFilter; /** The number of attributes in the pc transformed data */ private int m_outputNumAtts = -1; /** normalize the input data? */ private boolean m_normalize = false; /** the amount of variance to cover in the original data when retaining the best n PC's */ private double m_coverVariance = 0.95; /** transform the data through the pc space and back to the original space ? */ private boolean m_transBackToOriginal = false; /** holds the transposed eigenvectors for converting back to the original space */ private double [][] m_eTranspose; /** * Returns a string describing this attribute transformer * @return a description of the evaluator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Performs a principal components analysis and transformation of " +"the data. Use in conjunction with a Ranker search. Dimensionality " +"reduction is accomplished by choosing enough eigenvectors to " +"account for some percentage of the variance in the original data---" +"default 0.95 (95%). Attribute noise can be filtered by transforming " +"to the PC space, eliminating some of the worst eigenvectors, and " +"then transforming back to the original space."; } /** * Returns an enumeration describing the available options. <p> * * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(3); newVector.addElement(new Option("\tDon't normalize input data." , "D", 0, "-D")); newVector.addElement(new Option("\tRetain enough PC attributes to account " +"\n\tfor this proportion of variance in " +"the original data. (default = 0.95)", "R",1,"-R")); newVector.addElement(new Option("\tTransform through the PC space and " +"\n\tback to the original space." , "O", 0, "-O")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:<p> * -N <br> * Don't normalize the input data. <p> * * -R <variance> <br> * Retain enough pcs to account for this proportion of the variance. <p> * * -T <br> * Transform through the PC space and back to the original space. <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions (String[] options) throws Exception { resetOptions(); String optionString; optionString = Utils.getOption('R', options); if (optionString.length() != 0) { Double temp; temp = Double.valueOf(optionString); setVarianceCovered(temp.doubleValue()); } setNormalize(!Utils.getFlag('D', options)); setTransformBackToOriginal(Utils.getFlag('O', options)); } /** * Reset to defaults */ private void resetOptions() { m_coverVariance = 0.95; m_normalize = false; m_sumOfEigenValues = 0.0; m_transBackToOriginal = false; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String normalizeTipText() { return "Normalize input data."; } /** * Set whether input data will be normalized. * @param n true if input data is to be normalized */ public void setNormalize(boolean n) { m_normalize = n; } /** * Gets whether or not input data is to be normalized * @return true if input data is to be normalized */ public boolean getNormalize() { return m_normalize; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String varianceCoveredTipText() { return "Retain enough PC attributes to account for this proportion of " +"variance."; } /** * Sets the amount of variance to account for when retaining * principal components * @param vc the proportion of total variance to account for */ public void setVarianceCovered(double vc) { m_coverVariance = vc; } /** * Gets the proportion of total variance to account for when * retaining principal components * @return the proportion of variance to account for */ public double getVarianceCovered() { return m_coverVariance; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String transformBackToOriginalTipText() { return "Transform through the PC space and back to the original space. " +"If only the best n PCs are retained (by setting varianceCovered < 1) " +"then this option will give a dataset in the original space but with " +"less attribute noise."; } /** * Sets whether the data should be transformed back to the original * space * @param b true if the data should be transformed back to the * original space */ public void setTransformBackToOriginal(boolean b) { m_transBackToOriginal = b; } /** * Gets whether the data is to be transformed back to the original * space. * @return true if the data is to be transformed back to the original space */ public boolean getTransformBackToOriginal() { return m_transBackToOriginal; } /** * Gets the current settings of MatlabPCA * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[4]; int current = 0; if (!getNormalize()) { options[current++] = "-D"; } options[current++] = "-R"; options[current++] = ""+getVarianceCovered(); if (getTransformBackToOriginal()) { options[current++] = "-O"; } while (current < options.length) { options[current++] = ""; } return options; } /** * Initializes principal components and performs the analysis * @param data the instances to analyse/transform * @exception Exception if analysis fails */ public void buildEvaluator(Instances data) throws Exception {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -