📄 simplechameleon.java

📁 简单变色龙JAVA程序
💻 JAVA
字号:
/* 
 * 	  This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *    
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 */

/*
 *    SimpleChameleon.java
 *    Copyright (C) 2008 Athrun Zala
 */
package weka.clusterers;


import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.core.matrix.Matrix;

import java.util.Enumeration;
import java.util.Vector;

/**
 <!-- globalinfo-start -->
 * Cluster data using the spectral clustering algorithm
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -N &lt;num&gt;
 *  number of clusters.
 *  (default 2).</pre>
 * 
 * <pre> -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)</pre>
 * 
 <!-- options-end -->
 *
 * @author Athrun Zala
 * @version 1.00
 * @see RandomizableClusterer
 */
public class SimpleChameleon 
	extends RandomizableClusterer
	implements NumberOfClustersRequestable{
	
	/**
	 * for serialization
	 */
	private static final long serialVersionUID = 1L;
	
	/**
	 * replace the missing values in training instances
	 */
	private ReplaceMissingValues m_ReplaceMissingFilter;
	
	/**
	 * number of clusters to generate
	 */
	private int m_NumClusters = 2;
	
	/**
	 * threshold
	 */
	private int processed = 1000000;
	
	/**
	 * holds the index of the instance 
	 */
	private int m_Index;
	
	/**
	 * Holds the standard deviations of the numeric attributes in each cluster
	 */
	private Instances m_ClusterStdDevs;
	
	/**
	 * Holds the number of instances in each cluster
	 */
	private int [] m_ClusterSizes;
	
	/**
	 * the vector to hold the cluster
	 */
	private int[][] category;
	
	/**
	 * the default constructor
	 */
	public SimpleChameleon() {
		super();
		
		m_SeedDefault = 10;
		setSeed(m_SeedDefault);
	}
	
	/**
	 * Returns a string describing this clusterer
	 * @return a description of the evaluator suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String globalInfo() {
		return "Cluster data using the Chameleon Clustering Algorithm";
	}
	
	/**
	 * Returns default capabilities of the clusterer.
	 * @return      the capabilities of this clusterer
	 */
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();
		
		result.enable(Capability.NUMERIC_ATTRIBUTES);
		result.enable(Capability.MISSING_VALUES);
		
		return result;
	}
	
	 /**
	   * Generates a clusterer. Has to initialize all fields of the clusterer
	   * that are not being set via options.
	   *
	   * @param data set of instances serving as training data 
	   * @throws Exception if the clusterer has not been 
	   * generated successfully
	   */
	  public void buildClusterer(Instances data) throws Exception {
		  
		  // can clusterer handle the data?
		  getCapabilities().testWithFail(data);
		  
		  m_ReplaceMissingFilter = new ReplaceMissingValues();
		  Instances instances = new Instances(data);
		  instances.setClassIndex(-1);
		  m_ReplaceMissingFilter.setInputFormat(instances);
		  instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
		  
		  int n = instances.numInstances();

		  int []clusterAssignments = new int [n];
		  Instances [] tempI = new Instances[m_NumClusters];
		  
		  int i;
		  merge(instances);
		  labelCluster(instances);
			  
		  for (i = 0; i < instances.numInstances(); i++) {
			  Instance toCluster = instances.instance(i);
			  setIndex(i);
			  int newC = clusterProcessedInstance(toCluster, true);

			  clusterAssignments[i] = newC;
	      }
		  
		  for (i = 0; i < m_NumClusters; i++) {
			  tempI[i] = new Instances(instances, 0);
		  }
		  for (i = 0; i < instances.numInstances(); i++) {
			  tempI[clusterAssignments[i]].add(instances.instance(i));
		  }
		  
		  setIndex(0);
		  			  		  
		  m_ClusterStdDevs = new Instances(instances, m_NumClusters);			  
		  m_ClusterSizes = new int [m_NumClusters];			  
		  
		  for (i = 0; i < m_NumClusters; i++) {
			  double [] vals2 = new double[instances.numAttributes()];
			  for (int j = 0; j < instances.numAttributes(); j++) {
				  if (instances.attribute(j).isNumeric()) {
					  vals2[j] = Math.sqrt(tempI[i].variance(j));
				  } else {
					  vals2[j] = Instance.missingValue();
				  }	
			  }
			  m_ClusterStdDevs.add(new Instance(1.0, vals2));
		  }
	  }
		
	/**
	 * clusters an instance that has been through the filters
	 *
	 * @param instance the instance to assign a cluster to
	 * @param updateErrors if true, update the within clusters sum of errors
	 * @return a cluster number
	 */
	private int clusterProcessedInstance(Instance instance, boolean updateErrors) {
	
		int bestCluster = 0;
		for (int i = 0; i < m_NumClusters; i++) {
			if (category[0][m_Index] == i) {
				bestCluster = i;
			}
		}		
		m_Index++;
		return bestCluster;
	}
	
	/**
	 * Classifies a given instance.
	 *
	 * @param instance the instance to be assigned to a cluster
	 * @return the number of the assigned cluster as an interger
	 * if the class is enumerated, otherwise the predicted value
	 * @throws Exception if instance could not be classified
	 * successfully
	 */
	public int clusterInstance(Instance instance) throws Exception {
		m_ReplaceMissingFilter.input(instance);
		m_ReplaceMissingFilter.batchFinished();
		Instance inst = m_ReplaceMissingFilter.output();

		return clusterProcessedInstance(inst, false);
	}
	
	/**
	 * Calculates distance, the distance between two instances
	 * 
	 * @param first the first instance
	 * @param second the second instance
	 * @return the distance between the two given instances
	 */
	private double distance(Instance first, Instance second) {
		
		double val1;
		double val2;
		double dist = 0.0;
		
		for (int i = 0; i <first.numAttributes(); i++) {		
			val1 = first.value(i);			
			val2 = second.value(i);
						
			dist += (val1 - val2) * (val1 - val2);
		}
	    dist = Math.sqrt(dist);
		return dist;
	}

	/**
	 * construct the similar matrix
	 * 
	 * @param instances the data set
	 * @return W the weighted similar matrix
	 */
	private double[][] solveW(Instances instances) {
		int i, j;
		int n = instances.numInstances();
		double[][] W = new double[n][n];
		
		for (i = 0; i < n; i++) {
			for (j = i + 1; j < n; j++) {
				W[i][j] = distance(instances.instance(i), instances.instance(j));
				W[j][i] = processed;
			}

			W[i][i] = processed;
		}
		return W;
	}
	
	/**
	 * Initialize the category vector
	 * @param instances the data set
	 */
	private void initCategory(Instances instances) {
		int n = instances.numInstances();
		category = new int[1][n];
		for (int i = 0; i < n; i++) {
			category[0][i] = i;
		}
	}
	
	/**
	 * merge the two closer cluster
	 * @param instances
	 */
	private void merge(Instances instances) {	
		int i, j;
		int idx_of_clusterA = 0, idx_of_clusterB = 0;
		int n     = instances.numInstances();	
		int num_instance = instances.numInstances();
		Matrix W = new Matrix(solveW(instances));
		
		initCategory(instances);
		
		while (num_instance > m_NumClusters) {
			double mindist = 1000000;
			
			for (i = 0; i < n; i++) {
				for (j = i + 1; j < n; j++) {
					if (W.get(i, j) < mindist) {
						mindist = W.get(i, j);
						idx_of_clusterA = i;
						idx_of_clusterB = j;
					}
				}
			}
			
			W.set(idx_of_clusterA, idx_of_clusterB, processed);
			W.set(idx_of_clusterB, idx_of_clusterA, processed);
			
			for (i = 0; i < n; i++) {
				if (category[0][i] == idx_of_clusterB) {
					category[0][i] = idx_of_clusterA;
				}
			}
			
			for (i = 0; i < n; i++) {
				if (W.get(idx_of_clusterB, i) < W.get(idx_of_clusterA, i)) {
					W.set(idx_of_clusterA, i, W.get(idx_of_clusterB, i));
				}
				
				W.set(idx_of_clusterB, i, processed);
				W.set(i, idx_of_clusterB, processed);
			}
			
			num_instance -= 1;
		}
			
	}
	
	/**
	 * label the clusters
	 * @param instances
	 */
	private void labelCluster(Instances instances) {
		int i, j;
		int n           = instances.numInstances();
		int categories  = -1;
		int[][] labeled = new int[1][n];
		
		for (i = 0; i < n; i++) {
			int temp = category[0][i];
			if (labeled[0][temp] == 0) {
				categories += 1;
				
				for (j = i; j < n; j++) {
					if (category[0][j] == temp) {
						category[0][j] = categories;
					}
				}
				
				labeled[0][categories] = 1;
			}
		}
		
	}

	/**
	 * 
	 * set the index of the instance
	 */
	public void setIndex(int position) {
		m_Index = position;
	}	

	/**
	 * get the number of clusters
	 * 
	 * @return m_NumClusters
	 */
	public int getNumClusters() {
		return m_NumClusters;
	}
	  
	/**
	 * 
	 * @return m_ClusterSizes
	 */
	public int [] getClusterSizes() {
		  return m_ClusterSizes;
	}
	
	/**
	 * Returns the number of clusters.
	 *
	 * @return the number of clusters generated for a training dataset.
	 * @throws Exception if number of clusters could not be returned
	 * successfully
	 */
	public int numberOfClusters() throws Exception {
		return m_NumClusters;
	}

	/**
	 * set the number of clusters to generate
	 *
	 * @param n the number of clusters to generate
	 * @throws Exception if number of clusters is negative
	 */
	public void setNumClusters(int n) throws Exception {
		if (n <= 0) {
			throw new Exception("Number of clusters must be > 0");
		}
		m_NumClusters = n;			
	}
	  
	/**
	 * Returns an enumeration describing the available options.
	 *
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions () {
		Vector result = new Vector();
		  
		result.addElement(new Option(
				"\tnumber of clusters.\n"
				+ "\t(default 2).", 
				"N", 1, "-N <num>"));
		Enumeration en = super.listOptions();
		while (en.hasMoreElements())
			result.addElement(en.nextElement());
		  
		return  result.elements();
	}
	  
	/**
	 * Parses a given list of options. <p/>
	 * 
	 <!-- options-start -->
	 * Valid options are: <p/>
	 * 
	 * <pre> -N &lt;num&gt;
	 *  number of clusters.
	 *  (default 2).</pre>
	 * 
	 * <pre> -S &lt;num&gt;
	 *  Random number seed.
	 *  (default 10)</pre>
	 * 
	 <!-- options-end -->
	 *
	 * @param options the list of options as an array of strings
	 * @throws Exception if an option is not supported
	 */
	public void setOptions (String[] options) throws Exception {
		String optionString = Utils.getOption('N', options);
		if (optionString.length() != 0) {
			setNumClusters(Integer.parseInt(optionString));
		}
		  
		super.setOptions(options);		  
	}
	  
	/**
	 * Gets the current settings of SimpleKMeans
	 *
	 * @return an array of strings suitable for passing to setOptions()
	 */
	public String[] getOptions () {
		int       	i;
		Vector    	result;
		String[]  	options;
		  
		result = new Vector();
		  
		result.add("-N");
		result.add("" + getNumClusters());
		  
		options = super.getOptions();
		for (i = 0; i < options.length; i++)
			result.add(options[i]);
		return (String[]) result.toArray(new String[result.size()]);
	}
	
	/**
	 * return a string describing this clusterer
	 *
	 * @return a description of the clusterer as a string
	 */
	public String toString() {
		StringBuffer temp = new StringBuffer();
		
		temp.append("\nSimple Chameleon\n======\n");
		
		temp.append("sum of StdDevs: ");
		double sum = 0;
		for (int i = 0; i < m_NumClusters; i++) {
			for (int j = 0; j < m_ClusterStdDevs.numAttributes(); j++) {
				sum += m_ClusterStdDevs.instance(i).value(j);
			}
		}
		temp.append(sum + "\n\n\n");
		
		for (int i = 0; i < m_NumClusters; i++) {
			temp.append("\nCluster "+i+"\t");
			temp.append("\n\tStd Devs:  ");
			for (int j = 0; j < m_ClusterStdDevs.numAttributes(); j++) {
				temp.append(" " + Utils.doubleToString(m_ClusterStdDevs.instance(i).value(j), 5, 4));
			}
		}
		temp.append("\n\n");
		
		return temp.toString();
	}
	
	/**
	 * Main method for testing this class.
	 *
	 * @param argv should contain the following arguments: <p>
	 * -t training file [-N number of clusters]
	 */
	public static void main (String[] argv) {
		runClusterer(new SimpleChameleon(), argv);
	}
	  
}
💿 文件大小 5 K
👤 上传用户 jiangleip531
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#JAVA #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -