📄 kmeans.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Models.Clustering.CDBased.Algorithms.KMeans;
import java.util.Random;
import java.util.Vector;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;
/**
* @author Administrator
*/
public class KMeans extends CDBasedClusteringAlgorithm
{
private int numberOfClusters;
private int maxNumberOfIterations = 100;
private int numberOfIterations;
private int clusterAssignments[];
/**
* Empty constructor.
*/
public KMeans() {
}
/**
* Checks mining algorithm for completeness by calling verify method
* of superclass. Adiitionally, it checks whether numberOfClusters and
* maxNumberOfIterations are admissible.
*
* @throws IllegalArgumentException if some algorithm attributes are incorrect
*/
public void verify() throws IllegalArgumentException
{
super.verify();
if (numberOfClusters < 0)
throw new IllegalArgumentException("numberOfClusters can't be negative");
if (maxNumberOfIterations < 0)
throw new IllegalArgumentException("maxNumberOfIterations can't be negative");
}
/**
* Runs clustering algorithm.
*
* @exception MiningException cannot run algorithm
*/
protected void runAlgorithm() throws MiningException {
int numbAtt = metaData.getAttributesNumber();
int numbVec = 0;
// Get minimum and maximum of attributes:
double[] minArr = new double[ numbAtt ];
double[] maxArr = new double[ numbAtt ];
for (int i = 0; i < numbAtt; i++) {
minArr[i] = 0.0;
maxArr[i] = 0.0;
};
while (miningInputStream.next()) {
MiningVector vec = miningInputStream.read();
for (int i = 0; i < numbAtt; i++) {
if (vec.getValue(i) < minArr[i])
minArr[i] = vec.getValue(i);
if (vec.getValue(i) > maxArr[i])
maxArr[i] = vec.getValue(i);
};
numbVec = numbVec + 1;
};
distance.setMinAtt( minArr );
distance.setMaxAtt( maxArr );
// Create array of clusters:
clusters = new Cluster[ numberOfClusters ];
for (int i = 0; i < numberOfClusters; i++) {
clusters[i] = new Cluster();
clusters[i].setName("clust" + String.valueOf(i));
};
// Find numbOfClusters random vectors:
clusterAssignments = new int[numbVec];
boolean selected[] = new boolean[numbVec];
Random rand = new Random(10);
for (int i = 0; i < numberOfClusters; i++) {
int index = 0;
do {
index = Math.abs( rand.nextInt() ) % numbVec;
}
while (selected[index]);
// Add center vector to cluster array:
MiningVector vec = miningInputStream.read(index);
clusters[i].setCenterVec( vec );
selected[index] = true;
};
// Iterations:
numberOfIterations = 0;
boolean converged = false;
while (! converged && numberOfIterations < maxNumberOfIterations) {
//System.out.println("iter: " + (numberOfIterations+1));
converged = true;
// Find nearest cluster for all vectors:
for (int i = 0; i < numbVec; i++) {
int nC = clusterVector( miningInputStream.read(i) );
if (nC != clusterAssignments[i]) {
clusterAssignments[i] = nC;
converged = false;
};
};
// Find new center vectors:
for (int i = 0; i < numberOfClusters; i++) {
double[] nullVal = new double[ numbAtt ];
MiningVector nullVec = new MiningVector( nullVal );
nullVec.setMetaData( metaData );
clusters[i].setCenterVec( nullVec );
};
int[] cardClusters = new int[numberOfClusters];
for (int i = 0; i < numbVec; i++) {
MiningVector vec = miningInputStream.read(i);
int index = clusterAssignments[i];
for (int j = 0; j < numbAtt; j++) {
double val = clusters[index].getCenterVec().getValue(j);
val = val + vec.getValue(j);
clusters[index].getCenterVec().setValue(j, val);
};
cardClusters[index] = cardClusters[index] + 1;
};
for (int i = 0; i < numberOfClusters; i++) {
for (int j = 0; j < numbAtt; j++) {
double val = clusters[i].getCenterVec().getValue(j);
int card = cardClusters[i];
if (card == 0)
card = 1;
val = val / card;
clusters[i].getCenterVec().setValue(j, val);
};
};
numberOfIterations = numberOfIterations + 1;
};
/**
* Assign containedVectors variable the appropriate value in the Cluster class.
* This value can be used to transform to WEKA's Istances later for easy data visualization.
* Similar code can be found in KLinkage.runAlgorithm() function.
*
* By TWang. Jan 19, 2005.
*/
// Init the Vectors
Vector[] allContainedVectors = null;
allContainedVectors = new Vector[numberOfClusters];
for (int i = 0; i < numberOfClusters; i++){
allContainedVectors[i] = new Vector();
}
// Find nearest cluster for all vectors:
for (int i = 0; i < numbVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
int nC = clusterVector(mingVec);
allContainedVectors[nC].addElement(mingVec);
};
// Set the containedVectors variable
for (int i = 0; i < numberOfClusters; i++){
clusters[i].setContainedVectors(allContainedVectors[i]);
}
/**
* End. By Twang. Jan 19, 2005.
*/
}
/**
* Assign vector to nearest cluster.
*
* @param vec mining vector to be assigned to nearest cluster
* @return number of the nearest cluster, -1 if no cluster exist
* @exception MiningException cannot cluster vector
*/
private int clusterVector(MiningVector vec) throws MiningException {
if (clusters == null || clusters.length == 0)
return -1;
int nearestClust = 0;
double minDist = distance.distance( vec, clusters[0].getCenterVec() );
for (int i = 1; i < numberOfClusters; i++) {
double dist = distance.distance( vec, clusters[i].getCenterVec() );
if (dist < minDist) {
minDist = dist;
nearestClust = i;
};
};
return nearestClust;
}
/**
* Sets number of clusters.
* @param numberOfClusters new number of clusters
* @uml.property name="numberOfClusters"
*/
public void setNumberOfClusters(int numberOfClusters)
{
this.numberOfClusters = numberOfClusters;
}
/**
* Returns number of clusters.
* @return number of clusters
* @uml.property name="numberOfClusters"
*/
public int getNumberOfClusters()
{
return numberOfClusters;
}
/**
* Sets maximum number of iterations.
* @param maxNumberOfIterations new maximum number of iterations
* @uml.property name="maxNumberOfIterations"
*/
public void setMaxNumberOfIterations(int maxNumberOfIterations)
{
this.maxNumberOfIterations = maxNumberOfIterations;
}
/**
* Returns maximum number of iterations.
* @return maximum number of iteartions
* @uml.property name="maxNumberOfIterations"
*/
public int getMaxNumberOfIterations()
{
return maxNumberOfIterations;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -