📄 fwkmeans.java
字号:
package com.prudsys.pdm.Models.Clustering.CDBased.Algorithms.FWKMeans;
import java.util.ArrayList;
import java.util.Random;
import java.util.Vector;
import com.prudsys.pdm.Core.AttributeType;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;
/**
* implementation of a fuzzy automated variable weighting in k-means type clustering algorithm.
*
* @author Xiaoguang Xu in HITSZ-ICE
*/
public class FWKMeans extends CDBasedClusteringAlgorithm {
// ------------------------------------------------------------------
// these variables are specified by user and
// need to define in config File "algorithm.xml"
// ------------------------------------------------------------------
/**
* number of clusters to be generated by the algorithm.(is specified by user)
*/
private int numberOfClusters = 3;
/**
* the maximum number of iterations (is specified by user),not used in this algorithm
*/
private int maxNumberOfIterations = 100;
/** the number of iterations (is specified by user) */
private int numberOfIterations;
// **The parameter for partition, its valus must be greater than 1,and that its default is 2.*/
private double alpha = 2.0;
/** False no weighting process(is specified by user) */
private boolean weight;
// /newly addition;
// ------------------------------------------------------------------
// global variables in FWKMeas and are used by some method ,so
// these variables don't need to define in config File "algorithm.xml"
// ------------------------------------------------------------------
/** the number of all attributes */
private int numAtt;
/** the number of all objects to be clustered */
private int numVec;
/** the number of categorical attributes */
private int numOfCat;
/** the number of numeric attributes */
private int numOfNumeric;
/** the index array of Categorical attributes in MiningVector */
private int[] indexOfCate;
/** the index array of Numeric attributes in MinningVector */
private int[] indexOfNum;
/** stores the mean value of each numberic attribute */
private double[] meanValue;
/** object cluster membership identification */
private int[] objClusterID;
/** object cluster membership weight identification */
/** summation of values of variables in each cluster */
// private double[][] totalSum;
/** summation of distances of categorical attributes */
// private long[][] misMatch;
/** count the number of vectors(objects) in each cluster */
//private int[] clusterCount;
/** the percent of each instance belong to every cluster */
private double[][] partition;
public FWKMeans() {
}
/**
* checks mining algorithm for completeness by calling vefity mothod of superclass. Addtionally ,it checks whether
* numberOfClusters and maxNumberOfIterations are admittable.
*
* @throws IllegalArgumentException
* if some algorithm attibutes are incorrect.
*/
public void verify() throws IllegalArgumentException {
super.verify();
if (numberOfClusters < 0) {
throw new IllegalArgumentException("numberOfClusters can't be negative");
}
if (maxNumberOfIterations < 0) {
throw new IllegalArgumentException("maxNumberOfIterations can't be negative");
}
if (alpha < 1) {
throw new IllegalArgumentException("Parameter fuzzy must be greater than 1");
}
}
private void initialization(int numAtt) {
this.numAtt = numAtt;
this.indexOfCate = this.getIndexOfCate(numAtt);
this.numOfCat = indexOfCate.length;
this.indexOfNum = this.getIndexOfNum(numAtt);
this.numOfNumeric = indexOfNum.length;
this.meanValue = new double[numOfNumeric];
}
/**
* initializes the weights of all attributes.
*
* @param the
* number of attributes.
*/
private void weightInit() { // can be used directly!
double[] weights = new double[numAtt];
for (int i = 0; i < numAtt; i++) {
weights[i] = 1.0 / numAtt;
}
distance.setFieldWeights(weights);
// weights is used to compute the distance between two vectors. So it can
// and should be defined in the class Distance.
}
/**
* chooses the numberOfClusters vectors(objects)randomly as the initial prototypes.
*
* @param clusters
* ,the cluster array
* @param numVec
* ,the number of vectors(Objects).
* @throws MiningException
*/
private void prototypeInit() throws MiningException {
boolean selected[] = new boolean[numVec];
Random rand = new Random(10);
for (int i = 0; i < numberOfClusters; i++) {
int index = 0;
do {
index = Math.abs(rand.nextInt()) % numVec;
} while (selected[index]);
// Add center vector to cluster array:
MiningVector vec = miningInputStream.read(index);
clusters[i].setCenterVec(vec);
selected[index] = true;
}
}
/**
* compute object function value of P1 and assign every object to the nearest prototype
*
* @param start,
* the first index of vector(object)
* @param end,
* the last index of vector(objec)
* @param numAtt,the
* number of attributes
* @return the object function value of P1
* @throws MiningException
*/
private double getP1Cost(int start, int end) throws MiningException {
updatePartition(start, end);
double D1[][] = new double[numVec][numberOfClusters];
double dispersion = 0.0;
for (int i = start; i < end; i++)
{
MiningVector mingVec = miningInputStream.read(i);
for (int k = 0; k < numberOfClusters; k++) {
double numDist = 0.0;
double catDist = 0.0;
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
* Math.pow(mingVec.getValue(indexOfNum[j]), 2);
}
} else {
numDist = 0.0;
}
if (numOfCat > 0) {
for (int j = 0; j < numOfCat; j++) {
if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
catDist = catDist + 1.0;
}
}
}
else {
catDist = 0.0;
}
D1[i][k] = Math.sqrt(numDist) + this.getGamma() * Math.sqrt(catDist);
dispersion = dispersion + Math.pow(partition[i][k], alpha) * D1[i][k];
}
}
return dispersion;
}
private double getP3Cost(int start, int end) throws MiningException {
double D1[][] = new double[numVec][numberOfClusters];
double dispersion = 0.0;
for (int i = start; i < end; i++)// get the
{
// flag = 0;
MiningVector mingVec = miningInputStream.read(i);
for (int k = 0; k < numberOfClusters; k++) {
double numDist = 0.0;
double catDist = 0.0;
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
* Math.pow(mingVec.getValue(indexOfNum[j]), 2);
}
} else {
numDist = 0.0;
}
if (numOfCat > 0) {
for (int j = 0; j < numOfCat; j++) {
if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
catDist = catDist + 1.0;
}
}
}
else {
catDist = 0.0;
}
D1[i][k] = Math.sqrt(numDist) + this.getGamma() * Math.sqrt(catDist);
dispersion = dispersion + Math.pow(partition[i][k], alpha) * D1[i][k];
}
}
return dispersion;
}
private void updatePartition(int start, int end) throws MiningException { // having been updated!
double PseuEeachInsDisToAllCenter;
int flag = 0;
for (int i = start; i < end; i++)// get the
{
double D1[] = new double[numberOfClusters];
PseuEeachInsDisToAllCenter = 0.0;
flag = 0;
MiningVector mingVec = miningInputStream.read(i);
for (int k = 0; k < numberOfClusters; k++) {
double numDist = 0.0;
double catDist = 0.0;
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
double temporary = 0.0;
temporary = Math.abs(mingVec.getValue(indexOfNum[j])
- clusters[k].getCenterVec().getValue(indexOfNum[j]));
numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
* temporary * temporary;
}
} else {
numDist = 0.0;
}
if (numOfCat > 0) {
for (int j = 0; j < numOfCat; j++) {
if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
catDist = catDist + 1.0;
}
}
}
else {
catDist = 0.0;
}
D1[k] = numDist + this.getGamma() * catDist;
}
for (int k = 0; k < numberOfClusters; k++) {
if (D1[k] != 0) {
PseuEeachInsDisToAllCenter = PseuEeachInsDisToAllCenter + Math.pow(1/D1[k], 1.0 / (alpha - 1));
}
}
for (int k = 0; k < numberOfClusters; k++) {
if (D1[k] == 0) {
for (int s = 0; s < numberOfClusters; s++) {
partition[i][s] = 0.0;
}
partition[i][k] = 1.0;
flag = 1;
break;
}
}
if (flag == 1)
continue;
for (int k = 0; k < numberOfClusters; k++) {
partition[i][k] = 1.0 / (Math.pow(D1[k], 1.0 / (alpha - 1)) * PseuEeachInsDisToAllCenter);
// System.out.println(k);
}
}
}
/**
* Because in the Fuzzy weight K-means, each instance assigned to a cluster with a percent! Not integer 1 or 0; so
* we confirm each instace attribute to which cluster, by some definition.
*
* @param partion
* this is the parameter how much proporation a cluster take in one instance!
*/
private void confirmClusters(double partition[][]) {
double maxPercent;
for (int i = 0; i < numVec; i++) {
maxPercent = partition[i][0];
int whichcluster = 0;
for (int j = 0; j < numberOfClusters; j++) {
if (partition[i][j] > maxPercent) {
maxPercent = partition[i][j];
whichcluster = j;
}
}
objClusterID[i] = whichcluster;
// objClusterIDWeight[i] = maxPercent;
}
}
/**
* get the parameter Gamma that parameter for weight balancing numeric and categorical If numeric attributes exist,
* if numeric attributes don't exist then set Gamma=1.0
*
* @return Gamma
* @throws MiningException
*/
private double getGamma() throws MiningException { // can be used directly!
double squareSum, gamma = 0.0;
double[] globalGamma = new double[numOfNumeric];
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
squareSum = 0.0;
for (int i = 0; i < numVec; i++) {
MiningVector vec = miningInputStream.read(i);
squareSum += Math.pow(vec.getValue(indexOfNum[j]) - meanValue[j], 2);
}
globalGamma[j] = Math.sqrt(squareSum / (numVec - 1));
}
for (int j = 0; j < numOfNumeric; j++)
gamma += globalGamma[j];
gamma /= numOfNumeric;
gamma = 0.3 * gamma * gamma;
} else {
gamma = 1.0;
}
return gamma;
}
/**
* Update variable weights
*
* @param numArr,the
* number of attributes.
* @param numVec,the
* number of vectors (objects)to be clustered.
* @throws MiningException
*/
private void changeWeights(int numAtt, int numVec) throws MiningException {
double weights[], total, minCatDl;
// Dl = new double[numAtt];
total = 0.0;
weights = new double[numAtt];
minCatDl = 0.5 * numVec / numberOfClusters;
double DJ[] = new double[numAtt];
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
double DD = 0.0;
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
for (int k = 0; k < numberOfClusters; k++) {
double temp0 = mingVec.getValue(indexOfNum[j])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -