⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 principalcomponents.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    PrincipalComponents.java *    Copyright (C) 2000 Mark Hall * */package weka.attributeSelection;import  java.io.*;import  java.util.*;import  weka.core.*;import  weka.filters.unsupervised.attribute.ReplaceMissingValues;import  weka.filters.unsupervised.attribute.Normalize;import  weka.filters.unsupervised.attribute.NominalToBinary;import  weka.filters.unsupervised.attribute.Remove;import  weka.filters.Filter;/** * Class for performing principal components analysis/transformation. <p> * * Valid options are:<p> * -N <br> * Don't normalize the input data. <p> * * -R <variance> <br> * Retain enough pcs to account for this proportion of the variance. <p> * * -T <br> * Transform through the PC space and back to the original space. <p> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class PrincipalComponents extends UnsupervisedAttributeEvaluator   implements AttributeTransformer, OptionHandler {    /** The data to transform analyse/transform */  private Instances m_trainInstances;  /** Keep a copy for the class attribute (if set) */  private Instances m_trainCopy;  /** The header for the transformed data format */  private Instances m_transformedFormat;  /** The header for data transformed back to the original space */  private Instances m_originalSpaceFormat;  /** Data has a class set */  private boolean m_hasClass;  /** Class index */  private int m_classIndex;  /** Number of attributes */  private int m_numAttribs;  /** Number of instances */  private int m_numInstances;  /** Correlation matrix for the original data */  private double [][] m_correlation;  /** Will hold the unordered linear transformations of the (normalized)      original data */  private double [][] m_eigenvectors;    /** Eigenvalues for the corresponding eigenvectors */  private double [] m_eigenvalues = null;  /** Sorted eigenvalues */  private int [] m_sortedEigens;  /** sum of the eigenvalues */  private double m_sumOfEigenValues = 0.0;    /** Filters for original data */  private ReplaceMissingValues m_replaceMissingFilter;  private Normalize m_normalizeFilter;  private NominalToBinary m_nominalToBinFilter;  private Remove m_attributeFilter;    /** used to remove the class column if a class column is set */  private Remove m_attribFilter;  /** The number of attributes in the pc transformed data */  private int m_outputNumAtts = -1;    /** normalize the input data? */  private boolean m_normalize = true;  /** the amount of varaince to cover in the original data when      retaining the best n PC's */  private double m_coverVariance = 0.95;  /** transform the data through the pc space and back to the original      space ? */  private boolean m_transBackToOriginal = false;  /** holds the transposed eigenvectors for converting back to the      original space */  private double [][] m_eTranspose;  /**   * Returns a string describing this attribute transformer   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Performs a principal components analysis and transformation of "      +"the data. Use in conjunction with a Ranker search. Dimensionality "      +"reduction is accomplished by choosing enough eigenvectors to "      +"account for some percentage of the variance in the original data---"      +"default 0.95 (95%). Attribute noise can be filtered by transforming "      +"to the PC space, eliminating some of the worst eigenvectors, and "      +"then transforming back to the original space.";  }  /**   * Returns an enumeration describing the available options. <p>   *   * @return an enumeration of all the available options.   **/  public Enumeration listOptions () {    Vector newVector = new Vector(3);    newVector.addElement(new Option("\tDon't normalize input data." 				    , "D", 0, "-D"));    newVector.addElement(new Option("\tRetain enough PC attributes to account "				    +"\n\tfor this proportion of variance in "				    +"the original data. (default = 0.95)",				    "R",1,"-R"));        newVector.addElement(new Option("\tTransform through the PC space and "				    +"\n\tback to the original space."				    , "O", 0, "-O"));    return  newVector.elements();  }  /**   * Parses a given list of options.   *   * Valid options are:<p>   * -N <br>   * Don't normalize the input data. <p>   *   * -R <variance> <br>   * Retain enough pcs to account for this proportion of the variance. <p>   *   * -T <br>   * Transform through the PC space and back to the original space. <p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    resetOptions();    String optionString;    optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      Double temp;      temp = Double.valueOf(optionString);      setVarianceCovered(temp.doubleValue());    }    setNormalize(!Utils.getFlag('D', options));    setTransformBackToOriginal(Utils.getFlag('O', options));  }  /**   * Reset to defaults   */  private void resetOptions() {    m_coverVariance = 0.95;    m_normalize = true;    m_sumOfEigenValues = 0.0;    m_transBackToOriginal = false;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String normalizeTipText() {    return "Normalize input data.";  }  /**   * Set whether input data will be normalized.   * @param n true if input data is to be normalized   */  public void setNormalize(boolean n) {    m_normalize = n;  }  /**   * Gets whether or not input data is to be normalized   * @return true if input data is to be normalized   */  public boolean getNormalize() {    return m_normalize;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String varianceCoveredTipText() {    return "Retain enough PC attributes to account for this proportion of "      +"variance.";  }  /**   * Sets the amount of variance to account for when retaining   * principal components   * @param vc the proportion of total variance to account for   */  public void setVarianceCovered(double vc) {    m_coverVariance = vc;  }  /**   * Gets the proportion of total variance to account for when   * retaining principal components   * @return the proportion of variance to account for   */  public double getVarianceCovered() {    return m_coverVariance;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String transformBackToOriginalTipText() {    return "Transform through the PC space and back to the original space. "      +"If only the best n PCs are retained (by setting varianceCovered < 1) "      +"then this option will give a dataset in the original space but with "      +"less attribute noise.";  }  /**   * Sets whether the data should be transformed back to the original   * space   * @param b true if the data should be transformed back to the   * original space   */  public void setTransformBackToOriginal(boolean b) {    m_transBackToOriginal = b;  }    /**   * Gets whether the data is to be transformed back to the original   * space.   * @return true if the data is to be transformed back to the original space   */  public boolean getTransformBackToOriginal() {    return m_transBackToOriginal;  }  /**   * Gets the current settings of PrincipalComponents   *   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    String[] options = new String[4];    int current = 0;    if (!getNormalize()) {      options[current++] = "-D";    }    options[current++] = "-R"; options[current++] = ""+getVarianceCovered();    if (getTransformBackToOriginal()) {      options[current++] = "-O";    }        while (current < options.length) {      options[current++] = "";    }        return  options;  }  /**   * Initializes principal components and performs the analysis   * @param data the instances to analyse/transform   * @exception Exception if analysis fails   */  public void buildEvaluator(Instances data) throws Exception {    buildAttributeConstructor(data);  }  private void buildAttributeConstructor (Instances data) throws Exception {    m_eigenvalues = null;    m_outputNumAtts = -1;    m_attributeFilter = null;    m_nominalToBinFilter = null;    m_sumOfEigenValues = 0.0;    if (data.checkForStringAttributes()) {      throw  new UnsupportedAttributeTypeException("Can't handle string attributes!");    }    m_trainInstances = data;    // make a copy of the training data so that we can get the class    // column to append to the transformed data (if necessary)    m_trainCopy = new Instances(m_trainInstances);        m_replaceMissingFilter = new ReplaceMissingValues();    m_replaceMissingFilter.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances, 					m_replaceMissingFilter);    if (m_normalize) {      m_normalizeFilter = new Normalize();      m_normalizeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);    }    m_nominalToBinFilter = new NominalToBinary();    m_nominalToBinFilter.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances, 					m_nominalToBinFilter);        // delete any attributes with only one distinct value or are all missing    Vector deleteCols = new Vector();    for (int i=0;i<m_trainInstances.numAttributes();i++) {      if (m_trainInstances.numDistinctValues(i) <=1) {	deleteCols.addElement(new Integer(i));      }    }    if (m_trainInstances.classIndex() >=0) {      // get rid of the class column      m_hasClass = true;      m_classIndex = m_trainInstances.classIndex();      deleteCols.addElement(new Integer(m_classIndex));    }    // remove columns from the data if necessary    if (deleteCols.size() > 0) {      m_attributeFilter = new Remove();      int [] todelete = new int [deleteCols.size()];      for (int i=0;i<deleteCols.size();i++) {	todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();      }      m_attributeFilter.setAttributeIndicesArray(todelete);      m_attributeFilter.setInvertSelection(false);      m_attributeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);    }    m_numInstances = m_trainInstances.numInstances();    m_numAttribs = m_trainInstances.numAttributes();    fillCorrelation();    double [] d = new double[m_numAttribs];     double [][] v = new double[m_numAttribs][m_numAttribs];    Matrix corr = new Matrix(m_correlation);    corr.eigenvalueDecomposition(v, d);    //if (debug) {    //  Matrix V = new Matrix(v);    //  boolean b = corr.testEigen(V, d, true);    //  if (!b)    //	System.out.println("Problem with eigenvektors!!!");    //  else    //	System.out.println("***** everything's fine !!!");    //  }        m_eigenvectors = (double [][])v.clone();    m_eigenvalues = (double [])d.clone();    // any eigenvalues less than 0 are not worth anything --- change to 0    for (int i = 0; i < m_eigenvalues.length; i++) {      if (m_eigenvalues[i] < 0) {	m_eigenvalues[i] = 0.0;      }    }    m_sortedEigens = Utils.sort(m_eigenvalues);    m_sumOfEigenValues = Utils.sum(m_eigenvalues);    m_transformedFormat = setOutputFormat();    if (m_transBackToOriginal) {      m_originalSpaceFormat = setOutputFormatOriginal();            // new ordered eigenvector matrix      int numVectors = (m_transformedFormat.classIndex() < 0) 	? m_transformedFormat.numAttributes()	: m_transformedFormat.numAttributes() - 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -