📄 makedensitybasedclusterer.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    MakeDensityBasedClusterer.java *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand * */package weka.clusterers;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.estimators.DiscreteEstimator;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for wrapping a Clusterer to make it return a distribution and density. Fits normal distributions and discrete distributions within each cluster produced by the wrapped clusterer. Supports the NumberOfClustersRequestable interface only if the wrapped Clusterer does. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -M &lt;num&gt; *  minimum allowable standard deviation for normal density computation  *  (default 1e-6)</pre> *  * <pre> -W &lt;clusterer name&gt; *  Clusterer to wrap. *  (default weka.clusterers.SimpleKMeans)</pre> *  * <pre>  * Options specific to clusterer weka.clusterers.SimpleKMeans: * </pre> *  * <pre> -N &lt;num&gt; *  number of clusters. (default = 2).</pre> *  * <pre> -S &lt;num&gt; *  random number seed. *  (default 10)</pre> *  <!-- options-end --> *  * Options after "--" are passed on to the base clusterer. * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.13 $ */public class MakeDensityBasedClusterer   extends DensityBasedClusterer  implements NumberOfClustersRequestable, 	     OptionHandler, 	     WeightedInstancesHandler {  /** for serialization */  static final long serialVersionUID = -5643302427972186631L;    /** holds training instances header information */  private Instances m_theInstances;  /** prior probabilities for the fitted clusters */  private double [] m_priors;  /** normal distributions fitted to each numeric attribute in each cluster */  private double [][][] m_modelNormal;  /** discrete distributions fitted to each discrete attribute in each cluster */  private DiscreteEstimator [][] m_model;  /** default minimum standard deviation */  private double m_minStdDev = 1e-6;  /** The clusterer being wrapped */  private Clusterer m_wrappedClusterer = new weka.clusterers.SimpleKMeans();  /** globally replace missing values */  private ReplaceMissingValues m_replaceMissing;  /**   * Default constructor.   *    */    public MakeDensityBasedClusterer() {    super();  }     /**   * Contructs a MakeDensityBasedClusterer wrapping a given Clusterer.   *    * @param toWrap the clusterer to wrap around   */      public MakeDensityBasedClusterer(Clusterer toWrap) {    setClusterer(toWrap);  }    /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Class for wrapping a Clusterer to make it return a distribution "      + "and density. Fits normal distributions and discrete distributions "      + "within each cluster produced by the wrapped clusterer. Supports the "      + "NumberOfClustersRequestable interface only if the wrapped Clusterer "      + "does.";  }  /**   * String describing default clusterer.   *    * @return 		the default clusterer classname   */  protected String defaultClustererString() {    return SimpleKMeans.class.getName();  }  /**   * Set the number of clusters to generate.   *   * @param n the number of clusters to generate   * @throws Exception if the wrapped clusterer has not been set, or if   * the wrapped clusterer does not implement this facility.   */  public void setNumClusters(int n) throws Exception {    if (m_wrappedClusterer == null) {      throw new Exception("Can't set the number of clusters to generate - "			  +"no clusterer has been set yet.");    }    if (!(m_wrappedClusterer instanceof NumberOfClustersRequestable)) {      throw new Exception("Can't set the number of clusters to generate - "			  +"wrapped clusterer does not support this facility.");    }    ((NumberOfClustersRequestable)m_wrappedClusterer).setNumClusters(n);  }  /**   * Returns default capabilities of the clusterer (i.e., of the wrapper   * clusterer).   *   * @return      the capabilities of this clusterer   */  public Capabilities getCapabilities() {    if (m_wrappedClusterer != null)      return m_wrappedClusterer.getCapabilities();    else      return super.getCapabilities();  }    /**   * Builds a clusterer for a set of instances.   *   * @param data the instances to train the clusterer with   * @throws Exception if the clusterer hasn't been set or something goes wrong   */    public void buildClusterer(Instances data) throws Exception {    // can clusterer handle the data?    getCapabilities().testWithFail(data);    m_replaceMissing = new ReplaceMissingValues();    m_replaceMissing.setInputFormat(data);    data = weka.filters.Filter.useFilter(data, m_replaceMissing);    m_theInstances = new Instances(data, 0);    if (m_wrappedClusterer == null) {      throw new Exception("No clusterer has been set");    }    m_wrappedClusterer.buildClusterer(data);    m_model =        new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];    m_modelNormal =       new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()][2];    double[][] weights =  new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];    m_priors = new double[m_wrappedClusterer.numberOfClusters()];      for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {       for (int j = 0; j < data.numAttributes(); j++) {	 if (data.attribute(j).isNominal()) {	   m_model[i][j] = new DiscreteEstimator(data.attribute(j).numValues(),						 true);	 }       }     }          Instance inst = null;     // Compute mean, etc.     int[] clusterIndex = new int[data.numInstances()];     for (int i = 0; i < data.numInstances(); i++) {       inst = data.instance(i);       int cluster = m_wrappedClusterer.clusterInstance(inst);       m_priors[cluster] += inst.weight();       for (int j = 0; j < data.numAttributes(); j++) {	 if (!inst.isMissing(j)) {	   if (data.attribute(j).isNominal()) {	     m_model[cluster][j].addValue(inst.value(j),inst.weight());	   } else {	     m_modelNormal[cluster][j][0] += inst.weight() * inst.value(j);	     weights[cluster][j] += inst.weight();	   }	 }       }       clusterIndex[i] = cluster;     }     for (int j = 0; j < data.numAttributes(); j++) {       if (data.attribute(j).isNumeric()) {	 for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   	   if (weights[i][j] > 0) {	     m_modelNormal[i][j][0] /= weights[i][j];	   }	 }       }     }     // Compute standard deviations     for (int i = 0; i < data.numInstances(); i++) {       inst = data.instance(i);       for (int j = 0; j < data.numAttributes(); j++) {	 if (!inst.isMissing(j)) {	   if (data.attribute(j).isNumeric()) {	     double diff = m_modelNormal[clusterIndex[i]][j][0] - inst.value(j);	     m_modelNormal[clusterIndex[i]][j][1] += inst.weight() * diff * diff;	   }	 }       }     }     for (int j = 0; j < data.numAttributes(); j++) {       if (data.attribute(j).isNumeric()) {	 for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   	   if (weights[i][j] > 0) {	     m_modelNormal[i][j][1] = 	       Math.sqrt(m_modelNormal[i][j][1] / weights[i][j]);	   } else if (weights[i][j] <= 0) {	     m_modelNormal[i][j][1] = Double.MAX_VALUE;	   }	   if (m_modelNormal[i][j][1] <= m_minStdDev) {	     m_modelNormal[i][j][1] = data.attributeStats(j).numericStats.stdDev;	     if (m_modelNormal[i][j][1] <= m_minStdDev) {	       m_modelNormal[i][j][1] = m_minStdDev;	     }	   }	 }       }     }          Utils.normalize(m_priors);  }  /**   * Returns the cluster priors.   *    * @return the cluster priors   */  public double[] clusterPriors() {    double[] n = new double[m_priors.length];      System.arraycopy(m_priors, 0, n, 0, n.length);
12 下一页
💿 文件大小 124 K
👤 上传用户 wuseyue
📂 所属分类数学计算
🏷️ 相关标签

#数据挖掘 #聚类 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -