📄 clustermembership.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * ClusterMembership.java * Copyright (C) 2004 Mark Hall * */package weka.filters.unsupervised.attribute;import weka.clusterers.DensityBasedClusterer;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Range;import weka.core.Utils;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * A filter that uses a density-based clusterer to generate cluster membership values; filtered instances are composed of these values plus the class attribute (if set in the input data). If a (nominal) class attribute is set, the clusterer is run separately for each class. The class attribute (if set) and any user-specified attributes are ignored during the clustering operation * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -W <clusterer name> * Full name of clusterer to use. eg: * weka.clusterers.EM * Additional options after the '--'. * (default: weka.clusterers.EM)</pre> * * <pre> -I <att1,att2-att4,...> * The range of attributes the clusterer should ignore. * (the class attribute is automatically ignored)</pre> * <!-- options-end --> * * Options after the -- are passed on to the clusterer. * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author Eibe Frank * @version $Revision: 1.11 $ */public class ClusterMembership extends Filter implements UnsupervisedFilter, OptionHandler { /** for serialization */ static final long serialVersionUID = 6675702504667714026L; /** The clusterer */ protected DensityBasedClusterer m_clusterer = new weka.clusterers.EM(); /** Array for storing the clusterers */ protected DensityBasedClusterer[] m_clusterers; /** Range of attributes to ignore */ protected Range m_ignoreAttributesRange; /** Filter for removing attributes */ protected Filter m_removeAttributes; /** The prior probability for each class */ protected double[] m_priors; /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = m_clusterer.getCapabilities(); result.setMinimumNumberInstances(0); return result; } /** * tests the data whether the filter can actually handle it * * @param instanceInfo the data to test * @throws Exception if the test fails */ protected void testInputFormat(Instances instanceInfo) throws Exception { getCapabilities().testWithFail(removeIgnored(instanceInfo)); } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - only the * structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the inputFormat can't be set successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_removeAttributes = null; m_priors = null; return false; } /** * filters all attributes that should be ignored * * @param data the data to filter * @return the filtered data * @throws Exception if filtering fails */ protected Instances removeIgnored(Instances data) throws Exception { Instances result = data; if (m_ignoreAttributesRange != null || data.classIndex() >= 0) { result = new Instances(data); m_removeAttributes = new Remove(); String rangeString = ""; if (m_ignoreAttributesRange != null) { rangeString += m_ignoreAttributesRange.getRanges(); } if (data.classIndex() >= 0) { if (rangeString.length() > 0) { rangeString += "," + (data.classIndex() + 1); } else { rangeString = "" + (data.classIndex() + 1); } } ((Remove) m_removeAttributes).setAttributeIndices(rangeString); ((Remove) m_removeAttributes).setInvertSelection(false); m_removeAttributes.setInputFormat(data); result = Filter.useFilter(data, m_removeAttributes); } return result; } /** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (outputFormatPeek() == null) { Instances toFilter = getInputFormat(); Instances[] toFilterIgnoringAttributes; // Make subsets if class is nominal if ((toFilter.classIndex() >= 0) && toFilter.classAttribute().isNominal()) { toFilterIgnoringAttributes = new Instances[toFilter.numClasses()]; for (int i = 0; i < toFilter.numClasses(); i++) { toFilterIgnoringAttributes[i] = new Instances(toFilter, toFilter.numInstances()); } for (int i = 0; i < toFilter.numInstances(); i++) { toFilterIgnoringAttributes[(int)toFilter.instance(i).classValue()].add(toFilter.instance(i)); } m_priors = new double[toFilter.numClasses()]; for (int i = 0; i < toFilter.numClasses(); i++) { toFilterIgnoringAttributes[i].compactify(); m_priors[i] = toFilterIgnoringAttributes[i].sumOfWeights(); } Utils.normalize(m_priors); } else { toFilterIgnoringAttributes = new Instances[1]; toFilterIgnoringAttributes[0] = toFilter; m_priors = new double[1]; m_priors[0] = 1; } // filter out attributes if necessary for (int i = 0; i < toFilterIgnoringAttributes.length; i++) toFilterIgnoringAttributes[i] = removeIgnored(toFilterIgnoringAttributes[i]); // build the clusterers if ((toFilter.classIndex() <= 0) || !toFilter.classAttribute().isNominal()) { m_clusterers = DensityBasedClusterer.makeCopies(m_clusterer, 1); m_clusterers[0].buildClusterer(toFilterIgnoringAttributes[0]); } else { m_clusterers = DensityBasedClusterer.makeCopies(m_clusterer, toFilter.numClasses()); for (int i = 0; i < m_clusterers.length; i++) { if (toFilterIgnoringAttributes[i].numInstances() == 0) { m_clusterers[i] = null; } else { m_clusterers[i].buildClusterer(toFilterIgnoringAttributes[i]); } } } // create output dataset FastVector attInfo = new FastVector(); for (int j = 0; j < m_clusterers.length; j++) { if (m_clusterers[j] != null) { for (int i = 0; i < m_clusterers[j].numberOfClusters(); i++) { attInfo.addElement(new Attribute("pCluster_" + j + "_" + i)); } } } if (toFilter.classIndex() >= 0) { attInfo.addElement(toFilter.classAttribute().copy()); } attInfo.trimToSize(); Instances filtered = new Instances(toFilter.relationName()+"_clusterMembership", attInfo, 0); if (toFilter.classIndex() >= 0) { filtered.setClassIndex(filtered.numAttributes() - 1); } setOutputFormat(filtered); // build new dataset for (int i = 0; i < toFilter.numInstances(); i++) { convertInstance(toFilter.instance(i)); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); } /** * Input an instance for filtering. Ordinarily the instance is processed * and made available for output immediately. Some filters require all * instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been defined. */ public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -