⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 principalcomponents.java

📁 这是关于数据挖掘的一些算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    PrincipalComponents.java *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand * */package weka.attributeSelection;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Matrix;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SparseInstance;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.unsupervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.Normalize;import weka.filters.unsupervised.attribute.Remove;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Performs a principal components analysis and transformation of the data. Use in conjunction with a Ranker search. Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data---default 0.95 (95%). Attribute noise can be filtered by transforming to the PC space, eliminating some of the worst eigenvectors, and then transforming back to the original space. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Don't normalize input data.</pre> *  * <pre> -R *  Retain enough PC attributes to account  *  for this proportion of variance in the original data. *  (default = 0.95)</pre> *  * <pre> -O *  Transform through the PC space and  *  back to the original space.</pre> *  * <pre> -A *  Maximum number of attributes to include in  *  transformed attribute names. (-1 = include all)</pre> *  <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @version $Revision: 1.35 $ */public class PrincipalComponents   extends UnsupervisedAttributeEvaluator   implements AttributeTransformer, OptionHandler {    /** for serialization */  static final long serialVersionUID = 3310137541055815078L;    /** The data to transform analyse/transform */  private Instances m_trainInstances;  /** Keep a copy for the class attribute (if set) */  private Instances m_trainCopy;  /** The header for the transformed data format */  private Instances m_transformedFormat;  /** The header for data transformed back to the original space */  private Instances m_originalSpaceFormat;  /** Data has a class set */  private boolean m_hasClass;  /** Class index */  private int m_classIndex;  /** Number of attributes */  private int m_numAttribs;  /** Number of instances */  private int m_numInstances;  /** Correlation matrix for the original data */  private double [][] m_correlation;  /** Will hold the unordered linear transformations of the (normalized)      original data */  private double [][] m_eigenvectors;    /** Eigenvalues for the corresponding eigenvectors */  private double [] m_eigenvalues = null;  /** Sorted eigenvalues */  private int [] m_sortedEigens;  /** sum of the eigenvalues */  private double m_sumOfEigenValues = 0.0;    /** Filters for original data */  private ReplaceMissingValues m_replaceMissingFilter;  private Normalize m_normalizeFilter;  private NominalToBinary m_nominalToBinFilter;  private Remove m_attributeFilter;    /** used to remove the class column if a class column is set */  private Remove m_attribFilter;  /** The number of attributes in the pc transformed data */  private int m_outputNumAtts = -1;    /** normalize the input data? */  private boolean m_normalize = true;  /** the amount of varaince to cover in the original data when      retaining the best n PC's */  private double m_coverVariance = 0.95;  /** transform the data through the pc space and back to the original      space ? */  private boolean m_transBackToOriginal = false;    /** maximum number of attributes in the transformed attribute name */  private int m_maxAttrsInName = 5;  /** holds the transposed eigenvectors for converting back to the      original space */  private double [][] m_eTranspose;  /**   * Returns a string describing this attribute transformer   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Performs a principal components analysis and transformation of "      +"the data. Use in conjunction with a Ranker search. Dimensionality "      +"reduction is accomplished by choosing enough eigenvectors to "      +"account for some percentage of the variance in the original data---"      +"default 0.95 (95%). Attribute noise can be filtered by transforming "      +"to the PC space, eliminating some of the worst eigenvectors, and "      +"then transforming back to the original space.";  }  /**   * Returns an enumeration describing the available options. <p>   *   * @return an enumeration of all the available options.   **/  public Enumeration listOptions () {    Vector newVector = new Vector(3);    newVector.addElement(new Option("\tDon't normalize input data."                                     , "D", 0, "-D"));    newVector.addElement(new Option("\tRetain enough PC attributes to account "                                    +"\n\tfor this proportion of variance in "                                    +"the original data.\n"                                    + "\t(default = 0.95)",                                    "R",1,"-R"));        newVector.addElement(new Option("\tTransform through the PC space and "                                    +"\n\tback to the original space."                                    , "O", 0, "-O"));                                        newVector.addElement(new Option("\tMaximum number of attributes to include in "                                    + "\n\ttransformed attribute names. (-1 = include all)"                                    , "A", 1, "-A"));    return  newVector.elements();  }  /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Don't normalize input data.</pre>   *    * <pre> -R   *  Retain enough PC attributes to account    *  for this proportion of variance in the original data.   *  (default = 0.95)</pre>   *    * <pre> -O   *  Transform through the PC space and    *  back to the original space.</pre>   *    * <pre> -A   *  Maximum number of attributes to include in    *  transformed attribute names. (-1 = include all)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    resetOptions();    String optionString;    optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      Double temp;      temp = Double.valueOf(optionString);      setVarianceCovered(temp.doubleValue());    }    optionString = Utils.getOption('A', options);    if (optionString.length() != 0) {      setMaximumAttributeNames(Integer.parseInt(optionString));    }    setNormalize(!Utils.getFlag('D', options));    setTransformBackToOriginal(Utils.getFlag('O', options));  }  /**   * Reset to defaults   */  private void resetOptions() {    m_coverVariance = 0.95;    m_normalize = true;    m_sumOfEigenValues = 0.0;    m_transBackToOriginal = false;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String normalizeTipText() {    return "Normalize input data.";  }  /**   * Set whether input data will be normalized.   * @param n true if input data is to be normalized   */  public void setNormalize(boolean n) {    m_normalize = n;  }  /**   * Gets whether or not input data is to be normalized   * @return true if input data is to be normalized   */  public boolean getNormalize() {    return m_normalize;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String varianceCoveredTipText() {    return "Retain enough PC attributes to account for this proportion of "      +"variance.";  }  /**   * Sets the amount of variance to account for when retaining   * principal components   * @param vc the proportion of total variance to account for   */  public void setVarianceCovered(double vc) {    m_coverVariance = vc;  }  /**   * Gets the proportion of total variance to account for when   * retaining principal components   * @return the proportion of variance to account for   */  public double getVarianceCovered() {    return m_coverVariance;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String maximumAttributeNamesTipText() {    return "The maximum number of attributes to include in transformed attribute names.";  }  /**   * Sets maximum number of attributes to include in   * transformed attribute names.   * @param m the maximum number of attributes   */  public void setMaximumAttributeNames(int m) {    m_maxAttrsInName = m;  }  /**   * Gets maximum number of attributes to include in   * transformed attribute names.   * @return the maximum number of attributes   */  public int getMaximumAttributeNames() {    return m_maxAttrsInName;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String transformBackToOriginalTipText() {    return "Transform through the PC space and back to the original space. "      +"If only the best n PCs are retained (by setting varianceCovered < 1) "      +"then this option will give a dataset in the original space but with "      +"less attribute noise.";  }  /**   * Sets whether the data should be transformed back to the original   * space   * @param b true if the data should be transformed back to the   * original space   */  public void setTransformBackToOriginal(boolean b) {    m_transBackToOriginal = b;  }    /**   * Gets whether the data is to be transformed back to the original   * space.   * @return true if the data is to be transformed back to the original space   */  public boolean getTransformBackToOriginal() {    return m_transBackToOriginal;  }  /**   * Gets the current settings of PrincipalComponents   *   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    String[] options = new String[6];    int current = 0;    if (!getNormalize()) {      options[current++] = "-D";    }    options[current++] = "-R";    options[current++] = ""+getVarianceCovered();    options[current++] = "-A";    options[current++] = ""+getMaximumAttributeNames();    if (getTransformBackToOriginal()) {      options[current++] = "-O";    }        while (current < options.length) {      options[current++] = "";    }        return  options;  }  /**   * Returns the capabilities of this evaluator.   *   * @return            the capabilities of this evaluator   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);        // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.NUMERIC_CLASS);    result.enable(Capability.DATE_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    result.enable(Capability.NO_CLASS);        return result;  }  /**   * Initializes principal components and performs the analysis   * @param data the instances to analyse/transform   * @throws Exception if analysis fails   */  public void buildEvaluator(Instances data) throws Exception {    // can evaluator handle data?    getCapabilities().testWithFail(data);    buildAttributeConstructor(data);  }  private void buildAttributeConstructor (Instances data) throws Exception {    m_eigenvalues = null;    m_outputNumAtts = -1;    m_attributeFilter = null;    m_nominalToBinFilter = null;    m_sumOfEigenValues = 0.0;    m_trainInstances = new Instances(data);    // make a copy of the training data so that we can get the class    // column to append to the transformed data (if necessary)    m_trainCopy = new Instances(m_trainInstances);        m_replaceMissingFilter = new ReplaceMissingValues();    m_replaceMissingFilter.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances,                                         m_replaceMissingFilter);    if (m_normalize) {      m_normalizeFilter = new Normalize();      m_normalizeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);    }    m_nominalToBinFilter = new NominalToBinary();    m_nominalToBinFilter.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances,                                         m_nominalToBinFilter);        // delete any attributes with only one distinct value or are all missing    Vector deleteCols = new Vector();    for (int i=0;i<m_trainInstances.numAttributes();i++) {      if (m_trainInstances.numDistinctValues(i) <=1) {        deleteCols.addElement(new Integer(i));      }    }    if (m_trainInstances.classIndex() >=0) {      // get rid of the class column      m_hasClass = true;      m_classIndex = m_trainInstances.classIndex();      deleteCols.addElement(new Integer(m_classIndex));    }    // remove columns from the data if necessary    if (deleteCols.size() > 0) {      m_attributeFilter = new Remove();      int [] todelete = new int [deleteCols.size()];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -