randomprojection.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 777 行 · 第 1/2 页
JAVA
777 行
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RandomProjection.java
 *    Copyright (C) 2003 Ashraf M. Kibriya
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.SparseInstance;
import weka.core.Tag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/** 
 * Reduces the dimensionality of the data by projecting 
 * it onto a lower dimensional subspace using a random 
 * matrix with columns of unit length (It will reduce 
 * the number of attributes in the data while preserving 
 * much of its variation like PCA, but at a much less
 * computational cost). <br>
 * It first applies the  NominalToBinary filter to 
 * convert all attributes to numeric before reducing the
 * dimension. It preserves the class attribute.
 *
 * <p> Valid filter-specific options are: <p>
 *
 * -N <num> <br>
 * The number of dimensions (attributes) the data should
 * be reduced to (exclusive of the class attribute). <p>
 *
 * -P <percent> <br>
 * The percentage of dimensions (attributes) the data should
 * be reduced to  (exclusive of the class attribute). This 
 * -N option is ignored if this option is present or is greater 
 * than zero.<p>
 *
 * -D <distribution num> <br>
 * The distribution to use for calculating the random
 * matrix.<br>
 * <li> 1 - Sparse distribution of: (default) <br>
 *      sqrt(3)*{+1 with prob(1/6), 0 with prob(2/3), -1 with prob(1/6)}</li>
 * <li> 2 - Sparse distribution of: <br>
 *      {+1 with prob(1/2), -1 with prob(1/2)}</li>
 * <li> 3 - Gaussian distribution </li>
 *
 * -M <br>
 * Replace missing values using the ReplaceMissingValues filter
 *
 * -R <num> <br>
 * Specify the random seed for the random number generator for
 * calculating the random matrix.
 *
 * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) 
 * @version 1.0 - 22 July 2003 - Initial version (Ashraf M. Kibriya)
 */
public class RandomProjection extends Filter implements UnsupervisedFilter, OptionHandler {


  /** Stores the number of dimensions to reduce the data to */
  private int m_k=10;

  /** Stores the dimensionality the data should be reduced to as percentage of the original dimension */
  private double m_percent=0.0;

  /** Is the random matrix will be computed using 
      Gaussian distribution or not */
  private boolean m_useGaussian=false;
 
  /** The types of distributions that can be used for 
      calculating the random matrix */
  private static final int SPARSE1=1, SPARSE2=2, GAUSSIAN=3;

  public static final Tag [] TAGS_DSTRS_TYPE = {
    new Tag(SPARSE1, "Sparse 1"),
    new Tag(SPARSE2, "Sparse 2"),
    new Tag(GAUSSIAN, "Gaussian"),
  };

  /** Stores the distribution to use for calculating the
      random matrix */
  private int m_distribution=SPARSE1;

 
  /** Should the missing values be replaced using 
      unsupervised.ReplaceMissingValues filter */
  private boolean m_replaceMissing=false;

  /** Keeps track of output format if it is defined or not */
  private boolean m_OutputFormatDefined=false;

  /** The NominalToBinary filter applied to the data before this filter */
  private Filter ntob; // = new weka.filters.unsupervised.attribute.NominalToBinary();

  /** The ReplaceMissingValues filter */
  private Filter replaceMissing;
    
  /** Stores the random seed used to generate the random matrix */
  private long m_rndmSeed=42;


  /** The random matrix */
  private double rmatrix[][];

  /** The random number generator used for generating the random matrix */
  private Random r;


  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(2);

    newVector.addElement(new Option(
	      "\tThe number of dimensions (attributes) the data should be reduced to\n"
             +"\t(exclusive of the class attribute).",
	      "N", 1, "-N <number>"));

    newVector.addElement(new Option(
	      "\tThe distribution to use for calculating the random matrix.\n"
	     +"\tSparse1 is:\n"
	     +"\t  sqrt(3)*{-1 with prob(1/6), 0 with prob(2/3), +1 with prob(1/6)}\n"
	     +"\tSparse2 is:\n"
	     +"\t  {-1 with prob(1/2), +1 with prob(1/2)}\n",
	      "D", 1, "-D [SPARSE1|SPARSE2|GAUSSIAN]"));

    //newVector.addElement(new Option(
    //	      "\tUse Gaussian distribution for calculating the random matrix.",
    //	      "G", 0, "-G"));

    newVector.addElement(new Option(
	      "\tThe percentage of dimensions (attributes) the data should\n"
	     +"\tbe reduced to  (inclusive of the class attribute). This -N\n"
	     +"\toption is ignored if this option is present or is greater\n"
	     +"\tthan zero.",
	      "P", 1, "-P <percent>"));

    newVector.addElement(new Option(
	      "\tReplace missing values using the ReplaceMissingValues filter",
	      "M", 0, "-M"));

    newVector.addElement(new Option(
	      "\tThe random seed for the random number generator used for\n"
	     +"\tcalculating the random matrix.",
	      "R", 0, "-R <num>"));
 
    return newVector.elements();
  }

  /**
   * Parses the options for this object. Valid options are: <p>
   *
   * -N <num> <br>
   * The number of dimensions (attributes) the data should
   * be reduced to (exclusive of the class attribute). <p>
   *
   * -P <percent> <br>
   * The percentage of dimensions (attributes) the data should
   * be reduced to  (exclusive of the class attribute). This 
   * -N option is ignored if this option is present or is greater 
   * than zero.<p>
   *
   * -D <distribution num> <br>
   * The distribution to use for calculating the random
   * matrix.<br>
   * <li> 1 - Sparse distribution of: (default) <br>
   *      sqrt(3)*{+1 with prob(1/6), 0 with prob(2/3), -1 with prob(1/6)}</li>
   * <li> 2 - Sparse distribution of: <br>
   *      {+1 with prob(1/2), -1 with prob(1/2)}</li>
   * <li> 3 - Gaussian distribution </li>
   *
   * -M <br>
   * Replace missing values using the ReplaceMissingValues filter
   *
   * -R <num> <br>
   * Specify the random seed for the random number generator for
   * calculating the random matrix.   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {


    String mString = Utils.getOption('P', options);
    if (mString.length() != 0) {
	setPercent((double) Double.parseDouble(mString)); //setNumberOfAttributes((int) Integer.parseInt(mString));
    } else {
	mString = Utils.getOption('N', options);
	if (mString.length() != 0) 
	    setNumberOfAttributes(Integer.parseInt(mString));	    
	else	    
	    setNumberOfAttributes(10);
    }    
    
    mString = Utils.getOption('R', options);
    if(mString.length()!=0) {
	setRandomSeed( Long.parseLong(mString) );
    }

    mString = Utils.getOption('D', options);
    if(mString.length()!=0) {
	if(mString.equalsIgnoreCase("sparse1"))
	   setDistribution( new SelectedTag(SPARSE1, TAGS_DSTRS_TYPE) );
	else if(mString.equalsIgnoreCase("sparse2"))
	   setDistribution( new SelectedTag(SPARSE2, TAGS_DSTRS_TYPE) );
	else if(mString.equalsIgnoreCase("gaussian"))
	   setDistribution( new SelectedTag(GAUSSIAN, TAGS_DSTRS_TYPE) );	   
    }

    if(Utils.getFlag('M', options))
	setReplaceMissingValues(true);
    else
	setReplaceMissingValues(false);


   //if(Utils.getFlag('G', options))
   //    setUseGaussian(true);
   //else
   //    setUseGaussian(false);
    
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [10];
    int current = 0;

    //if (getUseGaussian()) {
    //  options[current++] = "-G";
    //}

    if (getReplaceMissingValues()) {
      options[current++] = "-M";
    }

    double d = getNumberOfAttributes();
    options[current++] = "-N";
    options[current++] = ""+d;
    
    d = getPercent();
    options[current++] = "-P";
    options[current++] = ""+d;
    
    long l = getRandomSeed();
    options[current++] = "-R";
    options[current++] = ""+l;
    
    SelectedTag t = getDistribution();
    options[current++] = "-D";
    options[current++] = ""+t.getSelectedTag().getReadable();


    while (current < options.length) {
      options[current++] = "";
    }

    return options;
  }
    
   
  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return "Reduces the dimensionality of the data by projecting"
	 + " it onto a lower dimensional subspace using a random"
	 + " matrix with columns of unit length (i.e. It will reduce"
	 + " the number of attributes in the data while preserving"
	 + " much of its variation like PCA, but at a much less"
	 + " computational cost).\n"
	 + "It first applies the  NominalToBinary filter to" 
	 + " convert all attributes to numeric before reducing the"
	 + " dimension. It preserves the class attribute.";
  }


  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numberOfAttributesTipText() {

    return "The number of dimensions (attributes) the data should"
         + " be reduced to.";
  }

  /** Sets the number of attributes (dimensions) the data should be reduced to */
  public void setNumberOfAttributes(int  newAttNum) {
      m_k = newAttNum;
  }
  
  /** 
   *  Gets the current number of attributes (dimensionality) to which the data 
   *  will be reduced to.
   */
  public int getNumberOfAttributes() {
      return m_k;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String percentTipText() {

      return  " The percentage of dimensions (attributes) the data should"
            + " be reduced to  (inclusive of the class attribute). This "
	    + " NumberOfAttributes option is ignored if this option is"
	    + " present or is greater than zero.";
  }

  /** Sets the percent the attributes (dimensions) of the data should be reduced to */
  public void setPercent(double newPercent) {
      if(newPercent>1)
	  newPercent /= 100;
      m_percent = newPercent;
  }

  /** Gets the percent the attributes (dimensions) of the data will be reduced to */
  public double getPercent() {
      return m_percent;
  }


  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String randomSeedTipText() {
      return  "The random seed used by the random"
	     +" number generator used for generating"
	     +" the random matrix ";
  }

  /** Sets the random seed of the random number generator */
  public void setRandomSeed(long seed) {
      m_rndmSeed = seed;
  }

  /** Gets the random seed of the random number generator */
  public long getRandomSeed() {
      return m_rndmSeed;
  }
randomprojection.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 randomprojection.java 源码文件，采用 Java 编程语言编写，共 777 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?