📄 plsfilter.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PLSFilter.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand * */package weka.filters.supervised.attribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.matrix.EigenvalueDecomposition;import weka.core.matrix.Matrix;import weka.filters.Filter;import weka.filters.SimpleBatchFilter;import weka.filters.SupervisedFilter;import weka.filters.unsupervised.attribute.Center;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import weka.filters.unsupervised.attribute.Standardize;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Runs Partial Least Square Regression over the given instances and computes the resulting beta matrix for prediction.<br/> * By default it replaces missing values and centers the data.<br/> * <br/> * For more information see:<br/> * <br/> * Tormod Naes, Tomas Isaksson, Tom Fearn, Tony Davies (2002). A User Friendly Guide to Multivariate Calibration and Classification. NIR Publications.<br/> * <br/> * StatSoft, Inc.. Partial Least Squares (PLS).<br/> * <br/> * Bent Jorgensen, Yuri Goegebeur. Module 7: Partial least squares regression I.<br/> * <br/> * S. de Jong (1993). SIMPLS: an alternative approach to partial least squares regression. Chemometrics and Intelligent Laboratory Systems. 18:251-263. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @book{Naes2002, * author = {Tormod Naes and Tomas Isaksson and Tom Fearn and Tony Davies}, * publisher = {NIR Publications}, * title = {A User Friendly Guide to Multivariate Calibration and Classification}, * year = {2002}, * ISBN = {0-9528666-2-5} * } * * @misc{missing_id, * author = {StatSoft, Inc.}, * booktitle = {Electronic Textbook StatSoft}, * title = {Partial Least Squares (PLS)}, * HTTP = {http://www.statsoft.com/textbook/stpls.html} * } * * @misc{missing_id, * author = {Bent Jorgensen and Yuri Goegebeur}, * booktitle = {ST02: Multivariate Data Analysis and Chemometrics}, * title = {Module 7: Partial least squares regression I}, * HTTP = {http://statmaster.sdu.dk/courses/ST02/module07/} * } * * @article{Jong1993, * author = {S. de Jong}, * journal = {Chemometrics and Intelligent Laboratory Systems}, * pages = {251-263}, * title = {SIMPLS: an alternative approach to partial least squares regression}, * volume = {18}, * year = {1993} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -C <num> * The number of components to compute. * (default: 20)</pre> * * <pre> -U * Updates the class attribute as well. * (default: off)</pre> * * <pre> -M * Turns replacing of missing values on. * (default: off)</pre> * * <pre> -A <SIMPLS|PLS1> * The algorithm to use. * (default: PLS1)</pre> * * <pre> -P <none|center|standardize> * The type of preprocessing that is applied to the data. * (default: center)</pre> * <!-- options-end --> * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $ */public class PLSFilter extends SimpleBatchFilter implements SupervisedFilter, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -3335106965521265631L; /** the type of algorithm: SIMPLS */ public static final int ALGORITHM_SIMPLS = 1; /** the type of algorithm: PLS1 */ public static final int ALGORITHM_PLS1 = 2; /** the types of algorithm */ public static final Tag[] TAGS_ALGORITHM = { new Tag(ALGORITHM_SIMPLS, "SIMPLS"), new Tag(ALGORITHM_PLS1, "PLS1") }; /** the type of preprocessing: None */ public static final int PREPROCESSING_NONE = 0; /** the type of preprocessing: Center */ public static final int PREPROCESSING_CENTER = 1; /** the type of preprocessing: Standardize */ public static final int PREPROCESSING_STANDARDIZE = 2; /** the types of preprocessing */ public static final Tag[] TAGS_PREPROCESSING = { new Tag(PREPROCESSING_NONE, "none"), new Tag(PREPROCESSING_CENTER, "center"), new Tag(PREPROCESSING_STANDARDIZE, "standardize") }; /** the maximum number of components to generate */ protected int m_NumComponents = 20; /** the type of algorithm */ protected int m_Algorithm = ALGORITHM_PLS1; /** the regression vector "r-hat" for PLS1 */ protected Matrix m_PLS1_RegVector = null; /** the P matrix for PLS1 */ protected Matrix m_PLS1_P = null; /** the W matrix for PLS1 */ protected Matrix m_PLS1_W = null; /** the b-hat vector for PLS1 */ protected Matrix m_PLS1_b_hat = null; /** the W matrix for SIMPLS */ protected Matrix m_SIMPLS_W = null; /** the B matrix for SIMPLS (used for prediction) */ protected Matrix m_SIMPLS_B = null; /** whether to include the prediction, i.e., modifying the class attribute */ protected boolean m_PerformPrediction = false; /** for replacing missing values */ protected Filter m_Missing = null; /** whether to replace missing values */ protected boolean m_ReplaceMissing = true; /** for centering the data */ protected Filter m_Filter = null; /** the type of preprocessing */ protected int m_Preprocessing = PREPROCESSING_CENTER; /** the mean of the class */ protected double m_ClassMean = 0; /** the standard deviation of the class */ protected double m_ClassStdDev = 0; /** * default constructor */ public PLSFilter() { super(); // setup pre-processing m_Missing = new ReplaceMissingValues(); m_Filter = new Center(); } /** * Returns a string describing this classifier. * * @return a description of the classifier suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Runs Partial Least Square Regression over the given instances " + "and computes the resulting beta matrix for prediction.\n" + "By default it replaces missing values and centers the data.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "Tormod Naes and Tomas Isaksson and Tom Fearn and Tony Davies"); result.setValue(Field.YEAR, "2002"); result.setValue(Field.TITLE, "A User Friendly Guide to Multivariate Calibration and Classification"); result.setValue(Field.PUBLISHER, "NIR Publications"); result.setValue(Field.ISBN, "0-9528666-2-5"); additional = result.add(Type.MISC); additional.setValue(Field.AUTHOR, "StatSoft, Inc."); additional.setValue(Field.TITLE, "Partial Least Squares (PLS)"); additional.setValue(Field.BOOKTITLE, "Electronic Textbook StatSoft"); additional.setValue(Field.HTTP, "http://www.statsoft.com/textbook/stpls.html"); additional = result.add(Type.MISC); additional.setValue(Field.AUTHOR, "Bent Jorgensen and Yuri Goegebeur"); additional.setValue(Field.TITLE, "Module 7: Partial least squares regression I"); additional.setValue(Field.BOOKTITLE, "ST02: Multivariate Data Analysis and Chemometrics"); additional.setValue(Field.HTTP, "http://statmaster.sdu.dk/courses/ST02/module07/"); additional = result.add(Type.ARTICLE); additional.setValue(Field.AUTHOR, "S. de Jong"); additional.setValue(Field.YEAR, "1993"); additional.setValue(Field.TITLE, "SIMPLS: an alternative approach to partial least squares regression"); additional.setValue(Field.JOURNAL, "Chemometrics and Intelligent Laboratory Systems"); additional.setValue(Field.VOLUME, "18"); additional.setValue(Field.PAGES, "251-263"); return result; } /** * Gets an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result; Enumeration enm; String param; SelectedTag tag; int i; result = new Vector(); enm = super.listOptions(); while (enm.hasMoreElements()) result.addElement(enm.nextElement()); result.addElement(new Option( "\tThe number of components to compute.\n" + "\t(default: 20)", "C", 1, "-C <num>")); result.addElement(new Option( "\tUpdates the class attribute as well.\n" + "\t(default: off)", "U", 0, "-U")); result.addElement(new Option( "\tTurns replacing of missing values on.\n" + "\t(default: off)", "M", 0, "-M")); param = ""; for (i = 0; i < TAGS_ALGORITHM.length; i++) { if (i > 0) param += "|"; tag = new SelectedTag(TAGS_ALGORITHM[i].getID(), TAGS_ALGORITHM); param += tag.getSelectedTag().getReadable(); } result.addElement(new Option( "\tThe algorithm to use.\n" + "\t(default: PLS1)", "A", 1, "-A <" + param + ">")); param = ""; for (i = 0; i < TAGS_PREPROCESSING.length; i++) { if (i > 0) param += "|"; tag = new SelectedTag(TAGS_PREPROCESSING[i].getID(), TAGS_PREPROCESSING); param += tag.getSelectedTag().getReadable(); } result.addElement(new Option( "\tThe type of preprocessing that is applied to the data.\n" + "\t(default: center)", "P", 1, "-P <" + param + ">")); return result.elements(); } /** * returns the options of the current setup * * @return the current options */ public String[] getOptions() { int i; Vector result; String[] options; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-C"); result.add("" + getNumComponents()); if (getPerformPrediction()) result.add("-U"); if (getReplaceMissing()) result.add("-M"); result.add("-A"); result.add("" + getAlgorithm().getSelectedTag().getReadable()); result.add("-P"); result.add("" + getPreprocessing().getSelectedTag().getReadable()); return (String[]) result.toArray(new String[result.size()]); } /** * Parses the options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -C <num> * The number of components to compute. * (default: 20)</pre> * * <pre> -U
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -