⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kddatagenerator.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *   KDDataGenerator.java *   Copyright (C) 2002 Mark Hall * */package weka.gui.boundaryvisualizer;import weka.core.*;import java.util.Random;import java.io.*;/** * KDDataGenerator. Class that uses kernels to generate new random * instances based on a supplied set of instances. * * @author <a href="mailto:mhall@cs.waikato.ac.nz">Mark Hall</a> * @version $Revision: 1.1.1.1 $ * @since 1.0 * @see DataGenerator * @see Serializable */public class KDDataGenerator implements DataGenerator, Serializable {  // the instances to use  private Instances m_instances;  // standard deviations of the normal distributions for numeric attributes in  // each KD estimator  private double [] m_standardDeviations;  // global means or modes to use for missing values  private double [] m_globalMeansOrModes;  // minimum standard deviation for numeric attributes  private double m_minStdDev = 1e-5;  // Laplace correction for discrete distributions  private double m_laplaceConst = 1.0;  // random number seed  private int m_seed = 1;  // random number generator  private Random m_random;  // the kernel estimator from which to generate the next instance from  private int m_KDToGenerateFrom;  // which dimensions to use for computing a weight for each generated  // instance  private boolean [] m_weightingDimensions;    // the values for the weighting dimensions to use for computing the weight  // for the next instance to be generated  private double [] m_weightingValues;    // created once only - for generating instances fast  private Instance m_instance;  private double [] m_instanceVals;  private static double m_normConst = Math.sqrt(2*Math.PI);  /**   * Initialize the generator using the supplied instances   *   * @param inputInstances the instances to use as the basis of the kernels   * @exception Exception if an error occurs   */  public void buildGenerator(Instances inputInstances) throws Exception {    m_random = new Random(m_seed);        m_KDToGenerateFrom = 0;    m_instances = inputInstances;    m_standardDeviations = new double [m_instances.numAttributes()];    m_globalMeansOrModes = new double [m_instances.numAttributes()];    if (m_weightingDimensions == null) {      m_weightingDimensions = new boolean[m_instances.numAttributes()];    }    for (int i = 0; i < m_instances.numAttributes(); i++) {      if (m_instances.attribute(i).isNumeric()) {	// global standard deviations	double var = m_instances.variance(i);	if (var == 0) {	  var = m_minStdDev;	} else {	  var = Math.sqrt(var);	  //  heuristic to take into account # instances and dimensions	  double adjust = Math.pow((double) m_instances.numInstances(), 				   1.0 / m_instances.numAttributes());		  //	  double adjust = m_instances.numInstances();	  var /= adjust;	}	m_standardDeviations[i] = var;      } else {	m_globalMeansOrModes[i] = m_instances.meanOrMode(i);      }    }    m_instanceVals = new double [m_instances.numAttributes()];    m_instance = new Instance(1.0, m_instanceVals);  }    /**   * Return a cumulative distribution from a discrete distribution   *   * @param dist the distribution to use   * @return the cumulative distribution   */  private double [] computeCumulativeDistribution(double [] dist) {    double [] cumDist = new double[dist.length];    double sum = 0;    for (int i = 0; i < dist.length; i++) {      sum += dist[i];      cumDist[i] = sum;    }        return cumDist;  }  /**   * Generate a new instance. Returns the instance in an brand new   * Instance object.   *   * @return an <code>Instance</code> value   * @exception Exception if an error occurs   */  public Instance generateInstance() throws Exception {    return generateInstance(false);  }    /**   * Generate a new instance. Reuses an existing instance object to   * speed up the process.   *   * @return an <code>Instance</code> value   * @exception Exception if an error occurs   */  public Instance generateInstanceFast() throws Exception {    return generateInstance(true);  }  /**   * Generates a new instance using one kernel estimator. Each successive   * call to this method incremets the index of the kernel to use.   *   * @param fast generate the instance quickly   * @return the new random instance   * @exception Exception if an error occurs   */  private Instance generateInstance(boolean fast) throws Exception {    if (m_weightingDimensions.length != m_instances.numAttributes()) {      throw new Exception("Weighting dimension array != num attributes!");    }    Instance newInst;    if (fast) {      newInst = m_instance;    } else {      m_instanceVals = new double [m_instances.numAttributes()];      newInst = new Instance(1.0, m_instanceVals);    }        double weight = 1;    for (int i = 0; i < m_instances.numAttributes(); i++) {      if (!m_weightingDimensions[i]) {	if (m_instances.attribute(i).isNumeric()) {	  double mean = 0;	  double val = m_random.nextGaussian();	  if (!m_instances.instance(m_KDToGenerateFrom).isMissing(i)) {	    mean = m_instances.instance(m_KDToGenerateFrom).value(i);	  } else {	    mean = m_globalMeansOrModes[i];	  }	  val *= m_standardDeviations[i];	  val += mean;	  m_instanceVals[i] = val;	} else {	  // nominal attribute	  double [] dist = new double[m_instances.attribute(i).numValues()];	  for (int j = 0; j < dist.length; j++) {	    dist[j] = m_laplaceConst;	  }	  if (!m_instances.instance(m_KDToGenerateFrom).isMissing(i)) {	    dist[(int)m_instances.instance(m_KDToGenerateFrom).value(i)]++;	  } else {	    dist[(int)m_globalMeansOrModes[i]]++;	  }	  Utils.normalize(dist);	  double [] cumDist = computeCumulativeDistribution(dist);	  double randomVal = m_random.nextDouble();	  int instVal = 0;	  for (int j = 0; j < cumDist.length; j++) {	    if (randomVal <= cumDist[j]) {	      instVal = j;	      break;	    }	  }	  m_instanceVals[i] = (double)instVal;	}      } else {	double mean = 0;	if (!m_instances.instance(m_KDToGenerateFrom).isMissing(i)) {	  mean = m_instances.instance(m_KDToGenerateFrom).value(i);	} else {	  mean = m_globalMeansOrModes[i];	}	double wm = 1.0;	if (m_instances.attribute(i).isNumeric()) {	  wm = normalDens(m_weightingValues[i], mean,				 m_standardDeviations[i]);	} else {	  wm = (1.0 + m_laplaceConst) / 	    (m_instances.attribute(i).numValues() * m_laplaceConst); 	}	if (wm > 0) {	  weight *= wm;	}	m_instanceVals[i] = m_weightingValues[i];      }    }    newInst.setWeight(weight);    // next kernel to generate from    m_KDToGenerateFrom++;    m_KDToGenerateFrom %= m_instances.numInstances();    return newInst;  }  /**   * Density function of normal distribution.   * @param x input value   * @param mean mean of distribution   * @param stdDev standard deviation of distribution   */  private double normalDens (double x, double mean, double stdDev) {    double diff = x - mean;       return  (1/(m_normConst*stdDev))*Math.exp(-(diff*diff/(2*stdDev*stdDev)));  }  /**   * Set which dimensions to use when computing a weight for the next   * instance to generate   *   * @param dims an array of booleans indicating which dimensions to use   */  public void setWeightingDimensions(boolean [] dims) {    m_weightingDimensions = dims;  }  /**   * Set the values for the weighting dimensions to be used when computing   * the weight for the next instance to be generated   *   * @param vals an array of doubles containing the values of the   * weighting dimensions (corresponding to the entries that are set to   * true throw setWeightingDimensions)   */  public void setWeightingValues(double [] vals) {    m_weightingValues = vals;  }  /**   * Return the number of kernels (there is one per training instance)   *   * @return the number of kernels   */  public int getNumGeneratingModels() {    if (m_instances != null) {      return m_instances.numInstances();    }    return 0;  }  /**   * Main method for tesing this class   *   * @param args a <code>String[]</code> value   */  public static void main(String [] args) {    try {      Reader r = null;      if (args.length != 1) {	throw new Exception("Usage: KDDataGenerator <filename>");      } else {	r = new BufferedReader(new FileReader(args[0]));	Instances insts = new Instances(r);	KDDataGenerator dg = new KDDataGenerator();	dg.buildGenerator(insts);	Instances header = new Instances(insts,0);	System.out.println(header);	for (int i = 0; i < insts.numInstances(); i++) {	  Instance newInst = dg.generateInstance();	  newInst.setDataset(header);	  System.out.println(newInst);	}      }    } catch (Exception ex) {      ex.printStackTrace();    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -