📄 randomprojection.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * RandomProjection.java * Copyright (C) 2003 Ashraf M. Kibriya * */package weka.filters.unsupervised.attribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.SparseInstance;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Reduces the dimensionality of the data by projecting it onto a lower dimensional subspace using a random matrix with columns of unit length (i.e. It will reduce the number of attributes in the data while preserving much of its variation like PCA, but at a much less computational cost).<br/> * It first applies the NominalToBinary filter to convert all attributes to numeric before reducing the dimension. It preserves the class attribute.<br/> * <br/> * For more information, see:<br/> * <br/> * Dmitriy Fradkin, David Madigan: Experiments with random projections for machine learning. In: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining, New York, NY, USA, 517-522, 003. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Fradkin003, * address = {New York, NY, USA}, * author = {Dmitriy Fradkin and David Madigan}, * booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining}, * pages = {517-522}, * publisher = {ACM Press}, * title = {Experiments with random projections for machine learning}, * year = {003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -N <number> * The number of dimensions (attributes) the data should be reduced to * (default 10; exclusive of the class attribute, if it is set).</pre> * * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN] * The distribution to use for calculating the random matrix. * Sparse1 is: * sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)} * Sparse2 is: * {-1 with prob(1/2), +1 with prob(1/2)} * </pre> * * <pre> -P <percent> * The percentage of dimensions (attributes) the data should * be reduced to (exclusive of the class attribute, if it is set). This -N * option is ignored if this option is present or is greater * than zero.</pre> * * <pre> -M * Replace missing values using the ReplaceMissingValues filter</pre> * * <pre> -R <num> * The random seed for the random number generator used for * calculating the random matrix (default 42).</pre> * <!-- options-end --> * * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.7 $ [1.0 - 22 July 2003 - Initial version (Ashraf M. Kibriya)] */public class RandomProjection extends Filter implements UnsupervisedFilter, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 4428905532728645880L; /** Stores the number of dimensions to reduce the data to */ private int m_k = 10; /** Stores the dimensionality the data should be reduced to as percentage of the original dimension */ private double m_percent = 0.0; /** Is the random matrix will be computed using Gaussian distribution or not */ private boolean m_useGaussian = false; /** distribution type: sparse 1 */ public static final int SPARSE1 = 1; /** distribution type: sparse 2 */ public static final int SPARSE2 = 2; /** distribution type: gaussian */ public static final int GAUSSIAN = 3; /** The types of distributions that can be used for calculating the random matrix */ public static final Tag [] TAGS_DSTRS_TYPE = { new Tag(SPARSE1, "Sparse 1"), new Tag(SPARSE2, "Sparse 2"), new Tag(GAUSSIAN, "Gaussian"), }; /** Stores the distribution to use for calculating the random matrix */ private int m_distribution = SPARSE1; /** Should the missing values be replaced using unsupervised.ReplaceMissingValues filter */ private boolean m_useReplaceMissing = false; /** Keeps track of output format if it is defined or not */ private boolean m_OutputFormatDefined = false; /** The NominalToBinary filter applied to the data before this filter */ private Filter m_ntob; // = new weka.filters.unsupervised.attribute.NominalToBinary(); /** The ReplaceMissingValues filter */ private Filter m_replaceMissing; /** Stores the random seed used to generate the random matrix */ private long m_rndmSeed = 42; /** The random matrix */ private double m_rmatrix[][]; /** The random number generator used for generating the random matrix */ private Random m_random; /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(2); newVector.addElement(new Option( "\tThe number of dimensions (attributes) the data should be reduced to\n" +"\t(default 10; exclusive of the class attribute, if it is set).", "N", 1, "-N <number>")); newVector.addElement(new Option( "\tThe distribution to use for calculating the random matrix.\n" +"\tSparse1 is:\n" +"\t sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}\n" +"\tSparse2 is:\n" +"\t {-1 with prob(1/2), +1 with prob(1/2)}\n", "D", 1, "-D [SPARSE1|SPARSE2|GAUSSIAN]")); //newVector.addElement(new Option( // "\tUse Gaussian distribution for calculating the random matrix.", // "G", 0, "-G")); newVector.addElement(new Option( "\tThe percentage of dimensions (attributes) the data should\n" +"\tbe reduced to (exclusive of the class attribute, if it is set). This -N\n" +"\toption is ignored if this option is present or is greater\n" +"\tthan zero.", "P", 1, "-P <percent>")); newVector.addElement(new Option( "\tReplace missing values using the ReplaceMissingValues filter", "M", 0, "-M")); newVector.addElement(new Option( "\tThe random seed for the random number generator used for\n" +"\tcalculating the random matrix (default 42).", "R", 0, "-R <num>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -N <number> * The number of dimensions (attributes) the data should be reduced to * (default 10; exclusive of the class attribute, if it is set).</pre> * * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN] * The distribution to use for calculating the random matrix. * Sparse1 is: * sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)} * Sparse2 is: * {-1 with prob(1/2), +1 with prob(1/2)} * </pre> * * <pre> -P <percent> * The percentage of dimensions (attributes) the data should * be reduced to (exclusive of the class attribute, if it is set). This -N * option is ignored if this option is present or is greater * than zero.</pre> * * <pre> -M * Replace missing values using the ReplaceMissingValues filter</pre> * * <pre> -R <num> * The random seed for the random number generator used for * calculating the random matrix (default 42).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String mString = Utils.getOption('P', options); if (mString.length() != 0) { setPercent((double) Double.parseDouble(mString)); //setNumberOfAttributes((int) Integer.parseInt(mString)); } else { setPercent(0); mString = Utils.getOption('N', options); if (mString.length() != 0) setNumberOfAttributes(Integer.parseInt(mString)); else setNumberOfAttributes(10); } mString = Utils.getOption('R', options); if(mString.length()!=0) { setRandomSeed( Long.parseLong(mString) ); } mString = Utils.getOption('D', options); if(mString.length()!=0) { if(mString.equalsIgnoreCase("sparse1")) setDistribution( new SelectedTag(SPARSE1, TAGS_DSTRS_TYPE) ); else if(mString.equalsIgnoreCase("sparse2")) setDistribution( new SelectedTag(SPARSE2, TAGS_DSTRS_TYPE) ); else if(mString.equalsIgnoreCase("gaussian")) setDistribution( new SelectedTag(GAUSSIAN, TAGS_DSTRS_TYPE) ); } if(Utils.getFlag('M', options)) setReplaceMissingValues(true); else setReplaceMissingValues(false); //if(Utils.getFlag('G', options)) // setUseGaussian(true); //else // setUseGaussian(false); } /** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [10]; int current = 0; //if (getUseGaussian()) { // options[current++] = "-G"; //} if (getReplaceMissingValues()) { options[current++] = "-M"; } if (getPercent() == 0) { options[current++] = "-N"; options[current++] = "" + getNumberOfAttributes(); } else { options[current++] = "-P"; options[current++] = "" + getPercent(); } options[current++] = "-R"; options[current++] = "" + getRandomSeed(); SelectedTag t = getDistribution(); options[current++] = "-D"; options[current++] = ""+t.getSelectedTag().getReadable(); while (current < options.length) { options[current++] = ""; } return options; } /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reduces the dimensionality of the data by projecting" + " it onto a lower dimensional subspace using a random" + " matrix with columns of unit length (i.e. It will reduce" + " the number of attributes in the data while preserving" + " much of its variation like PCA, but at a much less" + " computational cost).\n" + "It first applies the NominalToBinary filter to" + " convert all attributes to numeric before reducing the" + " dimension. It preserves the class attribute.\n\n" + "For more information, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Dmitriy Fradkin and David Madigan"); result.setValue(Field.TITLE, "Experiments with random projections for machine learning"); result.setValue(Field.BOOKTITLE, "KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining"); result.setValue(Field.YEAR, "003"); result.setValue(Field.PAGES, "517-522"); result.setValue(Field.PUBLISHER, "ACM Press"); result.setValue(Field.ADDRESS, "New York, NY, USA"); return result; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numberOfAttributesTipText() { return "The number of dimensions (attributes) the data should" + " be reduced to."; } /** * Sets the number of attributes (dimensions) the data should be reduced to * * @param newAttNum the goal for the dimensions */ public void setNumberOfAttributes(int newAttNum) { m_k = newAttNum; } /** * Gets the current number of attributes (dimensionality) to which the data * will be reduced to. * * @return the number of dimensions */ public int getNumberOfAttributes() { return m_k; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String percentTipText() { return " The percentage of dimensions (attributes) the data should" + " be reduced to (inclusive of the class attribute). This " + " NumberOfAttributes option is ignored if this option is" + " present or is greater than zero."; } /** * Sets the percent the attributes (dimensions) of the data should be reduced to * * @param newPercent the percentage of attributes */ public void setPercent(double newPercent) { if(newPercent > 0) newPercent /= 100; m_percent = newPercent; } /** * Gets the percent the attributes (dimensions) of the data will be reduced to * * @return the percentage of attributes */ public double getPercent() { return m_percent * 100; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -