⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.java

📁 聚类算法kmeans
💻 JAVA
字号:
/*
 * Implements the k-means algorithm
 *
 * Manas Somaiya
 * Computer and Information Science and Engineering
 * University of Florida
 *
 * Created: October 29, 2003
 * Last updated: October 30, 2003
 *
 */

import java.io.*;
import java.util.*;
 
 
public class kMeans {


	/** Number of clusters */
	private int k;
	

	/** Array of clusters */
	private cluster[] clusters;
	
	
	/** Number of iterations */
	private int nIterations;
	
	
	/** Vector of data points */
	private Vector<kMeansPoint> kMeansPoints;
	
	
	/** Name of the input file */
	private String inputFileName;
	
	
	/**
	 * Returns a new instance of kMeans algorithm
	 *
	 * @param	k		number of clusters
	 * @param	inputFileName	name of the file containing input data
	 */
         public kMeans(int k, String inputFileName) {
	
		this.k = k;
		this.inputFileName = inputFileName;
		this.clusters = new cluster[this.k];
		this.nIterations = 0;
		this.kMeansPoints = new Vector<kMeansPoint>(1000);
	
	} // end of kMeans()


	/**
	 * Returns a new instance of kMeans algorithm
	 *
	 * @param	k		number of clusters
	 * @param	kMeansPoints	List containing objects of type kMeansPoint
	 */
         public kMeans(int k, List<kMeansPoint> kMeansPoints) {
	
		this.k = k;
		this.clusters = new cluster[this.k];
		this.nIterations = 0;
		this.kMeansPoints=new Vector<kMeansPoint>(kMeansPoints);
	
	} // end of kMeans()
	
	
	/**
	 * Reads the input data from the file and stores the data points in the vector
	 */
	public void readData() throws IOException{
	    ArrayList <Integer> tmp = new ArrayList(3000);
		BufferedReader in = new BufferedReader(new FileReader(this.inputFileName));
		int b;
		
		while ((b= in.read()) != -1 )
		{
			        StringBuilder s = new StringBuilder();
			        while((char)b!=' '&&b!=-1)
			        {
			        	s.append((char)b);
			        	b=in.read();
			        }
			        tmp.add(new Integer(s.toString()));
			       
        }
		Iterator i = tmp.iterator();
		while(i.hasNext())
		{
		  kMeansPoint dp =new kMeansPoint(((Integer)i.next()).intValue(),((Integer)i.next()).intValue());
		  dp.assignToCluster(0);
		  this.kMeansPoints.add(dp); 
		}
		in.close();
	
	} // end of readData()
	
	
	/**
	 * Runs the k-means algorithm over the data set
	 */
	public void runKMeans() {
	
		// Select k points as initial means
		for (int i=0; i < k; i++){
		
			this.clusters[i] = new cluster(i);
			this.clusters[i].setMean((kMeansPoint)(this.kMeansPoints.get((int)(Math.random() * this.kMeansPoints.size()))));
		
		}
		
		
		do {
			// Form k clusters
			Iterator<kMeansPoint> i = this.kMeansPoints.iterator();
			while (i.hasNext())
				this.assignToCluster((kMeansPoint)(i.next()));
				
			this.nIterations++;
		
		}
		// Repeat while centroids do not change
		while (this.updateMeans());
	
	} // end of runKMeans()
	
	
	/**
	 * Assigns a data point to one of the k clusters based on its distance from the means of the clusters
	 *
	 * @param	dp	data point to be assigned
	 */
	private void assignToCluster(kMeansPoint dp) {
	
		int currentCluster = dp.getClusterNumber();
		double minDistance = kMeansPoint.distance(dp, this.clusters[currentCluster].getMean());;
		
		for (int i=0; i <this.k; i++)
			if (kMeansPoint.distance(dp, this.clusters[i].getMean()) < minDistance) {
		
				minDistance = kMeansPoint.distance(dp, this.clusters[i].getMean());
				currentCluster = i;
				
			}
		
		dp.assignToCluster(currentCluster);	
	
	} // end of assignToCluster
	
	
	/**
	 * Updates the means of all k clusters, and returns if they have changed or not
	 *
	 * @return	have the updated means of the clusters changed or not
	 */
	private boolean updateMeans() {
	
		boolean reply = false;
		
		int[] x = new int[this.k];
		int[] y = new int[this.k];
		int[] size = new int[this.k];
		kMeansPoint[] pastMeans = new kMeansPoint[this.k];
		
		for (int i=0; i<this.k; i++) {
		
			x[i] = 0;
			y[i] = 0;
			size[i] = 0;
			pastMeans[i] = this.clusters[i].getMean();
		
		}
		
		Iterator <kMeansPoint>i = this.kMeansPoints.iterator();
		while (i.hasNext()) {
		
		
			kMeansPoint dp = (kMeansPoint)(i.next());
			int currentCluster = dp.getClusterNumber();
			
			x[currentCluster] += dp.getX();
			y[currentCluster] += dp.getY();
			size[currentCluster]++;
		
		}
		
		for (int j=0; j < this.k; j++ ) 
			if(size[j] != 0) {
			
				x[j] /= size[j];
				y[j] /= size[j];
				kMeansPoint temp = new kMeansPoint(x[j], y[j]);
				temp.assignToCluster(j);
				this.clusters[j].setMean(temp);
				if (kMeansPoint.distance(pastMeans[j], this.clusters[j].getMean()) !=0 )
					reply = true;
					
			}
		
		return reply;
		
	} // end of updateMeans()


	/**
	 * Returns the value of k
	 *
	 * @return	the value of k
	 */
	public int getK() {

		return this.k;

	} // end of getK()
	
	
	/**
	 * Returns the specified cluster by index
	 *
	 * @param	index	index of the cluster to be returned
	 * @return	return the specified cluster by index
	 */
	public cluster getCluster(int index) {
	
		return this.clusters[index];
	
	} // end of getCluster()
        
        
	/**
	 * Returns the string output of the data points
	 *
	 * @return  the string output of the data points
	 */
	public String toString(){
            
		return this.kMeansPoints.toString();
            
	} // end of toString()
        
        
	/**
	 * Returns the data points
	 *
	 * @return  the data points
	 */
	public Vector<kMeansPoint> getDataPoints() {
            
		return this.kMeansPoints ;
            
	} // end of getDataPoints()
        
        
	/**
	 * Main method -- to test the kMeans class
	 *
	 * @param   args    command line arguments
	 */
	public static void main(String[] args) {
            
		kMeans km = new kMeans(2, "outDATA");
		
		try {
			km.readData();
		} catch (Exception e) {
			System.err.println(e);
			System.exit(-1);
		}
            
		km.runKMeans();
		Vector<Vector<kMeansPoint>> vec = new Vector(5);
		for(int i=0;i<5;i++)
		{
			Vector<kMeansPoint> tmp = new Vector(1000);
			vec.add(tmp);
		}
		Iterator i = km.getDataPoints().iterator();
		Iterator j = vec.iterator();
		
		while(i.hasNext())
		{
			kMeansPoint p;
			p = (kMeansPoint)i.next();
			vec.get(p.getClusterNumber()).add(p);
			System.out.println(p.getClusterNumber());
		}
		try
		{

		PrintWriter out = new PrintWriter(new FileOutputStream("out"),true);
		   while(j.hasNext())
		   {
			   out.print('[');
			   Iterator p = ((Vector<kMeansPoint>) j.next()).iterator();
			   while(p.hasNext())
			   {
				   kMeansPoint point;
				   point = (kMeansPoint)p.next();
				   out.print(point.getX());
				   out.print(' ');
				   out.print(point.getY());
				   out.print(' ');
			   }
			   out.print(']');
			   out.println();			
		   }
		   out.close();
		}
		catch(Exception error)
		{
			System.out.println("io error");
		}
        
                    
     } 

} // end of class

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -