📄 makedensitybasedclusterer.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * MakeDensityBasedClusterer.java * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand * */package weka.clusterers;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.estimators.DiscreteEstimator;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for wrapping a Clusterer to make it return a distribution and density. Fits normal distributions and discrete distributions within each cluster produced by the wrapped clusterer. Supports the NumberOfClustersRequestable interface only if the wrapped Clusterer does. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -M <num> * minimum allowable standard deviation for normal density computation * (default 1e-6)</pre> * * <pre> -W <clusterer name> * Clusterer to wrap. * (default weka.clusterers.SimpleKMeans)</pre> * * <pre> * Options specific to clusterer weka.clusterers.SimpleKMeans: * </pre> * * <pre> -N <num> * number of clusters. (default = 2).</pre> * * <pre> -S <num> * random number seed. * (default 10)</pre> * <!-- options-end --> * * Options after "--" are passed on to the base clusterer. * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.13 $ */public class MakeDensityBasedClusterer extends DensityBasedClusterer implements NumberOfClustersRequestable, OptionHandler, WeightedInstancesHandler { /** for serialization */ static final long serialVersionUID = -5643302427972186631L; /** holds training instances header information */ private Instances m_theInstances; /** prior probabilities for the fitted clusters */ private double [] m_priors; /** normal distributions fitted to each numeric attribute in each cluster */ private double [][][] m_modelNormal; /** discrete distributions fitted to each discrete attribute in each cluster */ private DiscreteEstimator [][] m_model; /** default minimum standard deviation */ private double m_minStdDev = 1e-6; /** The clusterer being wrapped */ private Clusterer m_wrappedClusterer = new weka.clusterers.SimpleKMeans(); /** globally replace missing values */ private ReplaceMissingValues m_replaceMissing; /** * Default constructor. * */ public MakeDensityBasedClusterer() { super(); } /** * Contructs a MakeDensityBasedClusterer wrapping a given Clusterer. * * @param toWrap the clusterer to wrap around */ public MakeDensityBasedClusterer(Clusterer toWrap) { setClusterer(toWrap); } /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for wrapping a Clusterer to make it return a distribution " + "and density. Fits normal distributions and discrete distributions " + "within each cluster produced by the wrapped clusterer. Supports the " + "NumberOfClustersRequestable interface only if the wrapped Clusterer " + "does."; } /** * String describing default clusterer. * * @return the default clusterer classname */ protected String defaultClustererString() { return SimpleKMeans.class.getName(); } /** * Set the number of clusters to generate. * * @param n the number of clusters to generate * @throws Exception if the wrapped clusterer has not been set, or if * the wrapped clusterer does not implement this facility. */ public void setNumClusters(int n) throws Exception { if (m_wrappedClusterer == null) { throw new Exception("Can't set the number of clusters to generate - " +"no clusterer has been set yet."); } if (!(m_wrappedClusterer instanceof NumberOfClustersRequestable)) { throw new Exception("Can't set the number of clusters to generate - " +"wrapped clusterer does not support this facility."); } ((NumberOfClustersRequestable)m_wrappedClusterer).setNumClusters(n); } /** * Returns default capabilities of the clusterer (i.e., of the wrapper * clusterer). * * @return the capabilities of this clusterer */ public Capabilities getCapabilities() { if (m_wrappedClusterer != null) return m_wrappedClusterer.getCapabilities(); else return super.getCapabilities(); } /** * Builds a clusterer for a set of instances. * * @param data the instances to train the clusterer with * @throws Exception if the clusterer hasn't been set or something goes wrong */ public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(data); data = weka.filters.Filter.useFilter(data, m_replaceMissing); m_theInstances = new Instances(data, 0); if (m_wrappedClusterer == null) { throw new Exception("No clusterer has been set"); } m_wrappedClusterer.buildClusterer(data); m_model = new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data.numAttributes()]; m_modelNormal = new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()][2]; double[][] weights = new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()]; m_priors = new double[m_wrappedClusterer.numberOfClusters()]; for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNominal()) { m_model[i][j] = new DiscreteEstimator(data.attribute(j).numValues(), true); } } } Instance inst = null; // Compute mean, etc. int[] clusterIndex = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); int cluster = m_wrappedClusterer.clusterInstance(inst); m_priors[cluster] += inst.weight(); for (int j = 0; j < data.numAttributes(); j++) { if (!inst.isMissing(j)) { if (data.attribute(j).isNominal()) { m_model[cluster][j].addValue(inst.value(j),inst.weight()); } else { m_modelNormal[cluster][j][0] += inst.weight() * inst.value(j); weights[cluster][j] += inst.weight(); } } } clusterIndex[i] = cluster; } for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNumeric()) { for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][0] /= weights[i][j]; } } } } // Compute standard deviations for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (!inst.isMissing(j)) { if (data.attribute(j).isNumeric()) { double diff = m_modelNormal[clusterIndex[i]][j][0] - inst.value(j); m_modelNormal[clusterIndex[i]][j][1] += inst.weight() * diff * diff; } } } } for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNumeric()) { for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][1] = Math.sqrt(m_modelNormal[i][j][1] / weights[i][j]); } else if (weights[i][j] <= 0) { m_modelNormal[i][j][1] = Double.MAX_VALUE; } if (m_modelNormal[i][j][1] <= m_minStdDev) { m_modelNormal[i][j][1] = data.attributeStats(j).numericStats.stdDev; if (m_modelNormal[i][j][1] <= m_minStdDev) { m_modelNormal[i][j][1] = m_minStdDev; } } } } } Utils.normalize(m_priors); } /** * Returns the cluster priors. * * @return the cluster priors */ public double[] clusterPriors() { double[] n = new double[m_priors.length]; System.arraycopy(m_priors, 0, n, 0, n.length);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -