clustermembership.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 502 行 · 第 1/2 页

JAVA
502
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    ClusterMembership.java *    Copyright (C) 2004 Mark Hall * */package weka.filters.unsupervised.attribute;import weka.clusterers.DensityBasedClusterer;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Range;import weka.core.Utils;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Vector;/**  <!-- globalinfo-start --> * A filter that uses a density-based clusterer to generate cluster membership values; filtered instances are composed of these values plus the class attribute (if set in the input data). If a (nominal) class attribute is set, the clusterer is run separately for each class. The class attribute (if set) and any user-specified attributes are ignored during the clustering operation * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -W &lt;clusterer name&gt; *  Full name of clusterer to use (required). *  eg: weka.clusterers.EM *  Additional options after the '--'.</pre> *  * <pre> -I &lt;att1,att2-att4,...&gt; *  The range of attributes the clusterer should ignore. *  (the class attribute is automatically ignored)</pre> *  <!-- options-end --> * * Options after the -- are passed on to the clusterer. * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Eibe Frank * @version $Revision: 1.8 $ */public class ClusterMembership   extends Filter   implements UnsupervisedFilter, OptionHandler {    /** for serialization */  static final long serialVersionUID = 6675702504667714026L;  /** The clusterer */  protected DensityBasedClusterer m_clusterer = new weka.clusterers.EM();  /** Array for storing the clusterers */  protected DensityBasedClusterer[] m_clusterers;  /** Range of attributes to ignore */  protected Range m_ignoreAttributesRange;  /** Filter for removing attributes */  protected Filter m_removeAttributes;  /** The prior probability for each class */  protected double[] m_priors;    /**   * Sets the format of the input instances.   *   * @param instanceInfo an Instances object containing the input instance   * structure (any instances contained in the object are ignored - only the   * structure is required).   * @return true if the outputFormat may be collected immediately   * @throws Exception if the inputFormat can't be set successfully    */   public boolean setInputFormat(Instances instanceInfo) throws Exception {        super.setInputFormat(instanceInfo);    m_removeAttributes = null;    m_priors = null;    return false;  }  /**   * Signify that this batch of input to the filter is finished.   *   * @return true if there are instances pending output   * @throws IllegalStateException if no input structure has been defined    */    public boolean batchFinished() throws Exception {    if (getInputFormat() == null) {      throw new IllegalStateException("No input instance format defined");    }    if (outputFormatPeek() == null) {      Instances toFilter = getInputFormat();      Instances[] toFilterIgnoringAttributes;      // Make subsets if class is nominal      if ((toFilter.classIndex() >= 0) && toFilter.classAttribute().isNominal()) {	toFilterIgnoringAttributes = new Instances[toFilter.numClasses()];	for (int i = 0; i < toFilter.numClasses(); i++) {	  toFilterIgnoringAttributes[i] = new Instances(toFilter, toFilter.numInstances());	}	for (int i = 0; i < toFilter.numInstances(); i++) {	  toFilterIgnoringAttributes[(int)toFilter.instance(i).classValue()].add(toFilter.instance(i));	}	m_priors = new double[toFilter.numClasses()];	for (int i = 0; i < toFilter.numClasses(); i++) {	  toFilterIgnoringAttributes[i].compactify();	  m_priors[i] = toFilterIgnoringAttributes[i].sumOfWeights();	}	Utils.normalize(m_priors);      } else {	toFilterIgnoringAttributes = new Instances[1];	toFilterIgnoringAttributes[0] = toFilter;	m_priors = new double[1];	m_priors[0] = 1;      }      // filter out attributes if necessary      if (m_ignoreAttributesRange != null || toFilter.classIndex() >= 0) {	m_removeAttributes = new Remove();	String rangeString = "";	if (m_ignoreAttributesRange != null) {	  rangeString += m_ignoreAttributesRange.getRanges();	}	if (toFilter.classIndex() >= 0) {	  if (rangeString.length() > 0) {	    rangeString += (","+(toFilter.classIndex()+1));	  	  } else {	    rangeString = ""+(toFilter.classIndex()+1);	  }	}	((Remove)m_removeAttributes).setAttributeIndices(rangeString);	((Remove)m_removeAttributes).setInvertSelection(false);	((Remove)m_removeAttributes).setInputFormat(toFilter);	for (int i = 0; i < toFilterIgnoringAttributes.length; i++) {	  toFilterIgnoringAttributes[i] = Filter.useFilter(toFilterIgnoringAttributes[i],							   m_removeAttributes);	}      }           // build the clusterers      if ((toFilter.classIndex() <= 0) || !toFilter.classAttribute().isNominal()) {	m_clusterers = DensityBasedClusterer.makeCopies(m_clusterer, 1);	m_clusterers[0].buildClusterer(toFilterIgnoringAttributes[0]);      } else {	m_clusterers = DensityBasedClusterer.makeCopies(m_clusterer, toFilter.numClasses());	for (int i = 0; i < m_clusterers.length; i++) {	  if (toFilterIgnoringAttributes[i].numInstances() == 0) {	    m_clusterers[i] = null;	  } else {	    m_clusterers[i].buildClusterer(toFilterIgnoringAttributes[i]);	  }	}      }            // create output dataset      FastVector attInfo = new FastVector();      for (int j = 0; j < m_clusterers.length; j++) {	if (m_clusterers[j] != null) {	  for (int i = 0; i < m_clusterers[j].numberOfClusters(); i++) {	    attInfo.addElement(new Attribute("pCluster_" + j + "_" + i));	  }	}      }      if (toFilter.classIndex() >= 0) {	attInfo.addElement(toFilter.classAttribute().copy());      }      attInfo.trimToSize();      Instances filtered = new Instances(toFilter.relationName()+"_clusterMembership",					 attInfo, 0);      if (toFilter.classIndex() >= 0) {	filtered.setClassIndex(filtered.numAttributes() - 1);      }      setOutputFormat(filtered);      // build new dataset      for (int i = 0; i < toFilter.numInstances(); i++) {	convertInstance(toFilter.instance(i));      }    }    flushInput();    m_NewBatch = true;    return (numPendingOutput() != 0);  }  /**   * Input an instance for filtering. Ordinarily the instance is processed   * and made available for output immediately. Some filters require all   * instances be read before producing output.   *   * @param instance the input instance   * @return true if the filtered instance may now be   * collected with output().   * @throws IllegalStateException if no input format has been defined.   */  public boolean input(Instance instance) throws Exception {    if (getInputFormat() == null) {      throw new IllegalStateException("No input instance format defined");    }    if (m_NewBatch) {      resetQueue();      m_NewBatch = false;    }        if (outputFormatPeek() != null) {      convertInstance(instance);      return true;    }    bufferInput(instance);    return false;  }  /**   * Converts logs back to density values.   *    * @param j the index of the clusterer   * @param in the instance to convert the logs back   * @return the densities   * @throws Exception if something goes wrong   */  protected double[] logs2densities(int j, Instance in) throws Exception {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?