randomprojection.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 895 行 · 第 1/2 页
JAVA
895 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    RandomProjection.java *    Copyright (C) 2003 Ashraf M. Kibriya * */package weka.filters.unsupervised.attribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.SparseInstance;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/**  <!-- globalinfo-start --> * Reduces the dimensionality of the data by projecting it onto a lower dimensional subspace using a random matrix with columns of unit length (i.e. It will reduce the number of attributes in the data while preserving much of its variation like PCA, but at a much less computational cost).<br/> * It first applies the  NominalToBinary filter to convert all attributes to numeric before reducing the dimension. It preserves the class attribute.<br/> * <br/> * For more information, see:<br/> * <br/> * Dmitriy Fradkin, David Madigan: Experiments with random projections for machine learning. In: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining, New York, NY, USA, 517-522, 003. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Fradkin003, *    address = {New York, NY, USA}, *    author = {Dmitriy Fradkin and David Madigan}, *    booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining}, *    pages = {517-522}, *    publisher = {ACM Press}, *    title = {Experiments with random projections for machine learning}, *    year = {003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -N &lt;number&gt; *  The number of dimensions (attributes) the data should be reduced to *  (default 10; exclusive of the class attribute, if it is set).</pre> *  * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN] *  The distribution to use for calculating the random matrix. *  Sparse1 is: *    sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)} *  Sparse2 is: *    {-1 with prob(1/2), +1 with prob(1/2)} * </pre> *  * <pre> -P &lt;percent&gt; *  The percentage of dimensions (attributes) the data should *  be reduced to (exclusive of the class attribute, if it is set). This -N *  option is ignored if this option is present or is greater *  than zero.</pre> *  * <pre> -M *  Replace missing values using the ReplaceMissingValues filter</pre> *  * <pre> -R &lt;num&gt; *  The random seed for the random number generator used for *  calculating the random matrix (default 42).</pre> *  <!-- options-end --> * * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)  * @version $Revision: 1.7 $ [1.0 - 22 July 2003 - Initial version (Ashraf M. Kibriya)] */public class RandomProjection   extends Filter   implements UnsupervisedFilter, OptionHandler, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 4428905532728645880L;  /** Stores the number of dimensions to reduce the data to */  private int m_k = 10;  /** Stores the dimensionality the data should be reduced to as percentage of the original dimension */  private double m_percent = 0.0;  /** Is the random matrix will be computed using       Gaussian distribution or not */  private boolean m_useGaussian = false;  /** distribution type: sparse 1 */  public static final int SPARSE1 = 1;  /** distribution type: sparse 2 */  public static final int SPARSE2 = 2;  /** distribution type: gaussian */  public static final int GAUSSIAN = 3;  /** The types of distributions that can be used for   calculating the random matrix */  public static final Tag [] TAGS_DSTRS_TYPE = {    new Tag(SPARSE1, "Sparse 1"),    new Tag(SPARSE2, "Sparse 2"),    new Tag(GAUSSIAN, "Gaussian"),  };  /** Stores the distribution to use for calculating the      random matrix */  private int m_distribution = SPARSE1;   /** Should the missing values be replaced using       unsupervised.ReplaceMissingValues filter */  private boolean m_useReplaceMissing = false;  /** Keeps track of output format if it is defined or not */  private boolean m_OutputFormatDefined = false;  /** The NominalToBinary filter applied to the data before this filter */  private Filter m_ntob; // = new weka.filters.unsupervised.attribute.NominalToBinary();  /** The ReplaceMissingValues filter */  private Filter m_replaceMissing;      /** Stores the random seed used to generate the random matrix */  private long m_rndmSeed = 42;  /** The random matrix */  private double m_rmatrix[][];  /** The random number generator used for generating the random matrix */  private Random m_random;  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(2);    newVector.addElement(new Option(	      "\tThe number of dimensions (attributes) the data should be reduced to\n"             +"\t(default 10; exclusive of the class attribute, if it is set).",	      "N", 1, "-N <number>"));    newVector.addElement(new Option(	      "\tThe distribution to use for calculating the random matrix.\n"	     +"\tSparse1 is:\n"	     +"\t  sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}\n"	     +"\tSparse2 is:\n"	     +"\t  {-1 with prob(1/2), +1 with prob(1/2)}\n",	      "D", 1, "-D [SPARSE1|SPARSE2|GAUSSIAN]"));    //newVector.addElement(new Option(    //	      "\tUse Gaussian distribution for calculating the random matrix.",    //	      "G", 0, "-G"));    newVector.addElement(new Option(	      "\tThe percentage of dimensions (attributes) the data should\n"	     +"\tbe reduced to (exclusive of the class attribute, if it is set). This -N\n"	     +"\toption is ignored if this option is present or is greater\n"	     +"\tthan zero.",	      "P", 1, "-P <percent>"));    newVector.addElement(new Option(	      "\tReplace missing values using the ReplaceMissingValues filter",	      "M", 0, "-M"));    newVector.addElement(new Option(	      "\tThe random seed for the random number generator used for\n"	     +"\tcalculating the random matrix (default 42).",	      "R", 0, "-R <num>"));     return newVector.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -N &lt;number&gt;   *  The number of dimensions (attributes) the data should be reduced to   *  (default 10; exclusive of the class attribute, if it is set).</pre>   *    * <pre> -D [SPARSE1|SPARSE2|GAUSSIAN]   *  The distribution to use for calculating the random matrix.   *  Sparse1 is:   *    sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}   *  Sparse2 is:   *    {-1 with prob(1/2), +1 with prob(1/2)}   * </pre>   *    * <pre> -P &lt;percent&gt;   *  The percentage of dimensions (attributes) the data should   *  be reduced to (exclusive of the class attribute, if it is set). This -N   *  option is ignored if this option is present or is greater   *  than zero.</pre>   *    * <pre> -M   *  Replace missing values using the ReplaceMissingValues filter</pre>   *    * <pre> -R &lt;num&gt;   *  The random seed for the random number generator used for   *  calculating the random matrix (default 42).</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String mString = Utils.getOption('P', options);    if (mString.length() != 0) {	setPercent((double) Double.parseDouble(mString)); //setNumberOfAttributes((int) Integer.parseInt(mString));    } else {        setPercent(0);	mString = Utils.getOption('N', options);	if (mString.length() != 0) 	    setNumberOfAttributes(Integer.parseInt(mString));	    	else	    	    setNumberOfAttributes(10);    }            mString = Utils.getOption('R', options);    if(mString.length()!=0) {	setRandomSeed( Long.parseLong(mString) );    }    mString = Utils.getOption('D', options);    if(mString.length()!=0) {	if(mString.equalsIgnoreCase("sparse1"))	   setDistribution( new SelectedTag(SPARSE1, TAGS_DSTRS_TYPE) );	else if(mString.equalsIgnoreCase("sparse2"))	   setDistribution( new SelectedTag(SPARSE2, TAGS_DSTRS_TYPE) );	else if(mString.equalsIgnoreCase("gaussian"))	   setDistribution( new SelectedTag(GAUSSIAN, TAGS_DSTRS_TYPE) );	       }    if(Utils.getFlag('M', options))	setReplaceMissingValues(true);    else	setReplaceMissingValues(false);   //if(Utils.getFlag('G', options))   //    setUseGaussian(true);   //else   //    setUseGaussian(false);      }  /**   * Gets the current settings of the filter.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] options = new String [10];    int current = 0;    //if (getUseGaussian()) {    //  options[current++] = "-G";    //}    if (getReplaceMissingValues()) {      options[current++] = "-M";    }    if (getPercent() == 0) {      options[current++] = "-N";      options[current++] = "" + getNumberOfAttributes();    }    else {      options[current++] = "-P";      options[current++] = "" + getPercent();    }        options[current++] = "-R";    options[current++] = "" + getRandomSeed();        SelectedTag t = getDistribution();    options[current++] = "-D";    options[current++] = ""+t.getSelectedTag().getReadable();    while (current < options.length) {      options[current++] = "";    }    return options;  }         /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Reduces the dimensionality of the data by projecting"	 + " it onto a lower dimensional subspace using a random"	 + " matrix with columns of unit length (i.e. It will reduce"	 + " the number of attributes in the data while preserving"	 + " much of its variation like PCA, but at a much less"	 + " computational cost).\n"	 + "It first applies the  NominalToBinary filter to" 	 + " convert all attributes to numeric before reducing the"	 + " dimension. It preserves the class attribute.\n\n"	 + "For more information, see:\n\n"	 + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Dmitriy Fradkin and David Madigan");    result.setValue(Field.TITLE, "Experiments with random projections for machine learning");    result.setValue(Field.BOOKTITLE, "KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining");    result.setValue(Field.YEAR, "003");    result.setValue(Field.PAGES, "517-522");    result.setValue(Field.PUBLISHER, "ACM Press");    result.setValue(Field.ADDRESS, "New York, NY, USA");        return result;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String numberOfAttributesTipText() {    return "The number of dimensions (attributes) the data should"         + " be reduced to.";  }  /**    * Sets the number of attributes (dimensions) the data should be reduced to   *    * @param newAttNum the goal for the dimensions   */  public void setNumberOfAttributes(int newAttNum) {      m_k = newAttNum;  }    /**    * Gets the current number of attributes (dimensionality) to which the data    * will be reduced to.   *     * @return the number of dimensions   */  public int getNumberOfAttributes() {      return m_k;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String percentTipText() {      return  " The percentage of dimensions (attributes) the data should"            + " be reduced to  (inclusive of the class attribute). This "	    + " NumberOfAttributes option is ignored if this option is"	    + " present or is greater than zero.";  }  /**    * Sets the percent the attributes (dimensions) of the data should be reduced to   *    * @param newPercent the percentage of attributes   */  public void setPercent(double newPercent) {      if(newPercent > 0)	  newPercent /= 100;      m_percent = newPercent;  }  /**    * Gets the percent the attributes (dimensions) of the data will be reduced to   *    * @return the percentage of attributes   */  public double getPercent() {      return m_percent * 100;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */
randomprojection.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 randomprojection.java 源码文件，采用 Java 编程语言编写，共 895 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?