📄 randomprojection.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* RandomProjection.java
* Copyright (C) 2003 Ashraf M. Kibriya
*
*/
package weka.filters.unsupervised.attribute;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.SparseInstance;
import weka.core.Tag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;
/**
* Reduces the dimensionality of the data by projecting
* it onto a lower dimensional subspace using a random
* matrix with columns of unit length (It will reduce
* the number of attributes in the data while preserving
* much of its variation like PCA, but at a much less
* computational cost). <br>
* It first applies the NominalToBinary filter to
* convert all attributes to numeric before reducing the
* dimension. It preserves the class attribute.
*
* <p> Valid filter-specific options are: <p>
*
* -N <num> <br>
* The number of dimensions (attributes) the data should
* be reduced to (exclusive of the class attribute). <p>
*
* -P <percent> <br>
* The percentage of dimensions (attributes) the data should
* be reduced to (exclusive of the class attribute). This
* -N option is ignored if this option is present or is greater
* than zero.<p>
*
* -D <distribution num> <br>
* The distribution to use for calculating the random
* matrix.<br>
* <li> 1 - Sparse distribution of: (default) <br>
* sqrt(3)*{+1 with prob(1/6), 0 with prob(2/3), -1 with prob(1/6)}</li>
* <li> 2 - Sparse distribution of: <br>
* {+1 with prob(1/2), -1 with prob(1/2)}</li>
* <li> 3 - Gaussian distribution </li>
*
* -M <br>
* Replace missing values using the ReplaceMissingValues filter
*
* -R <num> <br>
* Specify the random seed for the random number generator for
* calculating the random matrix.
*
* @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)
* @version 1.0 - 22 July 2003 - Initial version (Ashraf M. Kibriya)
*/
public class RandomProjection extends Filter implements UnsupervisedFilter, OptionHandler {
/** Stores the number of dimensions to reduce the data to */
private int m_k=10;
/** Stores the dimensionality the data should be reduced to as percentage of the original dimension */
private double m_percent=0.0;
/** Is the random matrix will be computed using
Gaussian distribution or not */
private boolean m_useGaussian=false;
/** The types of distributions that can be used for
calculating the random matrix */
private static final int SPARSE1=1, SPARSE2=2, GAUSSIAN=3;
public static final Tag [] TAGS_DSTRS_TYPE = {
new Tag(SPARSE1, "Sparse 1"),
new Tag(SPARSE2, "Sparse 2"),
new Tag(GAUSSIAN, "Gaussian"),
};
/** Stores the distribution to use for calculating the
random matrix */
private int m_distribution=SPARSE1;
/** Should the missing values be replaced using
unsupervised.ReplaceMissingValues filter */
private boolean m_replaceMissing=false;
/** Keeps track of output format if it is defined or not */
private boolean m_OutputFormatDefined=false;
/** The NominalToBinary filter applied to the data before this filter */
private Filter ntob; // = new weka.filters.unsupervised.attribute.NominalToBinary();
/** The ReplaceMissingValues filter */
private Filter replaceMissing;
/** Stores the random seed used to generate the random matrix */
private long m_rndmSeed=42;
/** The random matrix */
private double rmatrix[][];
/** The random number generator used for generating the random matrix */
private Random r;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(new Option(
"\tThe number of dimensions (attributes) the data should be reduced to\n"
+"\t(exclusive of the class attribute).",
"N", 1, "-N <number>"));
newVector.addElement(new Option(
"\tThe distribution to use for calculating the random matrix.\n"
+"\tSparse1 is:\n"
+"\t sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}\n"
+"\tSparse2 is:\n"
+"\t {-1 with prob(1/2), +1 with prob(1/2)}\n",
"D", 1, "-D [SPARSE1|SPARSE2|GAUSSIAN]"));
//newVector.addElement(new Option(
// "\tUse Gaussian distribution for calculating the random matrix.",
// "G", 0, "-G"));
newVector.addElement(new Option(
"\tThe percentage of dimensions (attributes) the data should\n"
+"\tbe reduced to (inclusive of the class attribute). This -N\n"
+"\toption is ignored if this option is present or is greater\n"
+"\tthan zero.",
"P", 1, "-P <percent>"));
newVector.addElement(new Option(
"\tReplace missing values using the ReplaceMissingValues filter",
"M", 0, "-M"));
newVector.addElement(new Option(
"\tThe random seed for the random number generator used for\n"
+"\tcalculating the random matrix.",
"R", 0, "-R <num>"));
return newVector.elements();
}
/**
* Parses the options for this object. Valid options are: <p>
*
* -N <num> <br>
* The number of dimensions (attributes) the data should
* be reduced to (exclusive of the class attribute). <p>
*
* -P <percent> <br>
* The percentage of dimensions (attributes) the data should
* be reduced to (exclusive of the class attribute). This
* -N option is ignored if this option is present or is greater
* than zero.<p>
*
* -D <distribution num> <br>
* The distribution to use for calculating the random
* matrix.<br>
* <li> 1 - Sparse distribution of: (default) <br>
* sqrt(3)*{+1 with prob(1/6), 0 with prob(2/3), -1 with prob(1/6)}</li>
* <li> 2 - Sparse distribution of: <br>
* {+1 with prob(1/2), -1 with prob(1/2)}</li>
* <li> 3 - Gaussian distribution </li>
*
* -M <br>
* Replace missing values using the ReplaceMissingValues filter
*
* -R <num> <br>
* Specify the random seed for the random number generator for
* calculating the random matrix. *
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String mString = Utils.getOption('P', options);
if (mString.length() != 0) {
setPercent((double) Double.parseDouble(mString)); //setNumberOfAttributes((int) Integer.parseInt(mString));
} else {
mString = Utils.getOption('N', options);
if (mString.length() != 0)
setNumberOfAttributes(Integer.parseInt(mString));
else
setNumberOfAttributes(10);
}
mString = Utils.getOption('R', options);
if(mString.length()!=0) {
setRandomSeed( Long.parseLong(mString) );
}
mString = Utils.getOption('D', options);
if(mString.length()!=0) {
if(mString.equalsIgnoreCase("sparse1"))
setDistribution( new SelectedTag(SPARSE1, TAGS_DSTRS_TYPE) );
else if(mString.equalsIgnoreCase("sparse2"))
setDistribution( new SelectedTag(SPARSE2, TAGS_DSTRS_TYPE) );
else if(mString.equalsIgnoreCase("gaussian"))
setDistribution( new SelectedTag(GAUSSIAN, TAGS_DSTRS_TYPE) );
}
if(Utils.getFlag('M', options))
setReplaceMissingValues(true);
else
setReplaceMissingValues(false);
//if(Utils.getFlag('G', options))
// setUseGaussian(true);
//else
// setUseGaussian(false);
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [10];
int current = 0;
//if (getUseGaussian()) {
// options[current++] = "-G";
//}
if (getReplaceMissingValues()) {
options[current++] = "-M";
}
double d = getNumberOfAttributes();
options[current++] = "-N";
options[current++] = ""+d;
d = getPercent();
options[current++] = "-P";
options[current++] = ""+d;
long l = getRandomSeed();
options[current++] = "-R";
options[current++] = ""+l;
SelectedTag t = getDistribution();
options[current++] = "-D";
options[current++] = ""+t.getSelectedTag().getReadable();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Reduces the dimensionality of the data by projecting"
+ " it onto a lower dimensional subspace using a random"
+ " matrix with columns of unit length (i.e. It will reduce"
+ " the number of attributes in the data while preserving"
+ " much of its variation like PCA, but at a much less"
+ " computational cost).\n"
+ "It first applies the NominalToBinary filter to"
+ " convert all attributes to numeric before reducing the"
+ " dimension. It preserves the class attribute.";
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numberOfAttributesTipText() {
return "The number of dimensions (attributes) the data should"
+ " be reduced to.";
}
/** Sets the number of attributes (dimensions) the data should be reduced to */
public void setNumberOfAttributes(int newAttNum) {
m_k = newAttNum;
}
/**
* Gets the current number of attributes (dimensionality) to which the data
* will be reduced to.
*/
public int getNumberOfAttributes() {
return m_k;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String percentTipText() {
return " The percentage of dimensions (attributes) the data should"
+ " be reduced to (inclusive of the class attribute). This "
+ " NumberOfAttributes option is ignored if this option is"
+ " present or is greater than zero.";
}
/** Sets the percent the attributes (dimensions) of the data should be reduced to */
public void setPercent(double newPercent) {
if(newPercent>1)
newPercent /= 100;
m_percent = newPercent;
}
/** Gets the percent the attributes (dimensions) of the data will be reduced to */
public double getPercent() {
return m_percent;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String randomSeedTipText() {
return "The random seed used by the random"
+" number generator used for generating"
+" the random matrix ";
}
/** Sets the random seed of the random number generator */
public void setRandomSeed(long seed) {
m_rndmSeed = seed;
}
/** Gets the random seed of the random number generator */
public long getRandomSeed() {
return m_rndmSeed;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -