📄 wkmeans.java
字号:
indexOfCate, AttributeType.CATEGORICAL);
else
catDist = 0.0;
dist = numDist + this.getGamma() * catDist;
if (dist < dmin) {
dmin = dist;
objClusterID[vecIndex] = i;
}
}
return dmin;
}
/**
* Update variable weights
*
* @param numArr,the
* number of attributes.
* @param numVec,the
* number of vectors (objects)to be clustered.
* @throws MiningException
*/
private void changeWeights(int numAtt, int numVec) throws MiningException {
double Dl[], weights[], temp, total, totalWeight, minCatDl;
Dl = new double[numAtt];
weights = new double[numAtt];
total = 0.0;
totalWeight = 0.0;
minCatDl = 0.5 * numVec / numberOfClusters;
for (int j = 0; j < numOfNumeric; j++) {
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
temp = mingVec.getValue(indexOfNum[j])
- clusters[objClusterID[i]].getCenterVec().getValue(
indexOfNum[j]);
Dl[indexOfNum[j]] += temp * temp;
}
}
for (int j = 0; j < numOfCat; j++) {
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
if (mingVec.getValue(indexOfCate[j]) != clusters[objClusterID[i]]
.getCenterVec().getValue(indexOfCate[j]))
Dl[indexOfCate[j]] += 1;
}
if (Dl[indexOfCate[j]] < minCatDl)
Dl[indexOfCate[j]] = minCatDl;
}
/*
* for(int j=0;j<numOfNumeric;j++)
* total+=Math.pow(1.0/Dl[indexOfNum[j]],1.0/(distance.getBeta()-1));
* for(int j=0;j<numOfCat;j++)
* total+=Math.pow(1.0/Dl[indexOfCate[j]],1.0/(distance.getBeta()-1));
*/
for (int j = 0; j < numAtt; j++) {
total += Math.pow(1.0 / Dl[j], 1.0 / (distance.getBeta() - 1));
}
for (int j = 0; j < numAtt; j++) {
weights[j] = 1.0 / (Math.pow(Dl[j], 1.0 / (distance.getBeta() - 1)) * total);
totalWeight += weights[j];
}
distance.setFieldWeights(weights);
}
/**
* method to change prototypes of all attributes. for the
* CategoricalAttribute, Select the most frequent category of each
* categorical attribute as the prototype values of the cluster.
*
* @param cluster
* @param numAtt
* ,the number of all attributes.
*/
private void changePrototype(int cluster) {
double maxFreq;
int index;
double[] value = new double[numAtt];
MiningVector centerVec = new MiningVector(value);
centerVec.setMetaData(metaData);
/** change prototypes of numeric attributes */
for (int j = 0; j < numOfNumeric; j++) {
centerVec.setValue(indexOfNum[j], totalSum[cluster][j]
/ clusterCount[cluster]);
}
/** change prototypes of categorical attributes */
for (int j = 0; j < numOfCat; j++) {
maxFreq = 0.0;
index = 0;// may have a bug.
for (int k = 0; k < clusterCateVar[cluster][j].size(); k++) {
int freq = ((Integer) clusterCateVar[cluster][j].get(k))
.intValue();
if (freq > maxFreq) {
maxFreq = freq;
index = k;
}
}
centerVec.setValue(indexOfCate[j], index);
}
clusters[cluster].setCenterVec(centerVec);
}
/**
* run WKMeans algorithm
*
* @exception MiningException
* can't run algorithm
*/
protected void runAlgorithm() throws MiningException {
/* the number of data objects to be clustered */
int numVec = 0;
/* get the number of all attributes */
int numAtt = metaData.getAttributesNumber();
/* initialize the global variables */
this.initialization(numAtt);
/*
* computes the number of data objects to be clutered and computes the
* mean value of each numeric attribute. we can invoke the method
* 'miningInputStream.getVectorNumber()'to get the the number of data
* objects to be clutered
*/
while (miningInputStream.next()) {
MiningVector vec = miningInputStream.read();
for (int j = 0; j < numOfNumeric; j++) {
meanValue[j] += vec.getValue(indexOfNum[j]);
}
numVec++;
}
for (int j = 0; j < numOfNumeric; j++) {
meanValue[j] /= numVec;
}
this.numVec = numVec;
this.objClusterID = new int[numVec];
// -------------test-------------------------
// this.println(numAtt,numVec);
// ------------------------------------------
/* initilizes the weight of each attribute */
if (distance.getFieldWeights() == null && weight) {
this.weightInit();
}
/* creates array of clusters */
clusters = new Cluster[numberOfClusters];
for (int i = 0; i < numberOfClusters; i++) {
clusters[i] = new Cluster();
clusters[i].setName("clust" + String.valueOf(i));
}
/*
* chooses numberOfClusters vectors(objects)randomly as initial
* prototypes
*/
this.prototypeInit();
// ---------------------------------------------
System.out.println("initial prototypes:");
for (int i = 0; i < numberOfClusters; i++) {
System.out.println(" Cluster[" + i + "]:"
+ clusters[i].getCenterVec());
}
// ---------------------------------------------
// ----------------------------------------------
if (weight) {
double[] w = distance.getFieldWeights();
System.out.print("initial weights:");
for (int i = 0; i < w.length; i++) {
System.out.print(" " + w[i]);
}
System.out.println();
System.out.println("---------------------------------");
}
// -----------------------------------------------
double dispersion, dispersion1;
/*
* compute object function value of P1 and assign every object to the
* nearest prototype
*/
dispersion = this.getP1Cost(0, numVec);
/* the process of clustering */
while (true) {
for (int i = 0; i < numberOfClusters; i++) {
this.changePrototype(i);
}
// ---------------------------------------------
System.out.println("new prototypes:");
for (int i = 0; i < numberOfClusters; i++) {
System.out.println(" Cluster[" + i + "]:"
+ clusters[i].getCenterVec());
}
// ---------------------------------------------
/* update attibutes Weight */
if (weight) {
this.changeWeights(numAtt, numVec);
}
dispersion1 = this.getP2Cost(0, numVec);
// ------------------------------------------------------------
if (weight) {
double[] w1 = distance.getFieldWeights();
System.out.print("new weights:");
for (int i = 0; i < w1.length; i++) {
System.out.print(" " + w1[i]);
}
System.out.println();
System.out.println("---------------------------------");
}
// --------------------------------------------------------------
numberOfIterations++;
if (dispersion1 < 0.000001
|| Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001)
break;
dispersion = this.getP1Cost(0, numVec);
numberOfIterations++;
if (dispersion < 0.000001
|| Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001)
break;
}
// -----------------------------------------------------------------------
System.out.println("numberOfIterations=" + numberOfIterations);
// -----------------------------------------------------------------------
/**
* Assign containedVectors variable the appropriate value in the Cluster
* class. This value can be used to transform to WEKA's Istances later
* for easy data visualization. Similar code can be found in
* KLinkage.runAlgorithm() function.
*/
Vector[] allContainedVectors = new Vector[numberOfClusters];
for (int i = 0; i < numberOfClusters; i++) {
allContainedVectors[i] = new Vector();
}
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
allContainedVectors[objClusterID[i]].addElement(mingVec);
}
/* Set the containedVectors variable */
for (int i = 0; i < numberOfClusters; i++) {
clusters[i].setContainedVectors(allContainedVectors[i]);
}
}
/**
* returns maximum number of iterations.
* @return maximum number of iterations.
* @uml.property name="maxNumberOfIterations"
*/
public int getMaxNumberOfIterations() {
return maxNumberOfIterations;
}
/**
* sets maximum number of iterations.
* @param maxNumberOfIterations new maximum number of iterations.
* @uml.property name="maxNumberOfIterations"
*/
public void setMaxNumberOfIterations(int maxNumberOfIterations) {
this.maxNumberOfIterations = maxNumberOfIterations;
}
/**
* returns number of clusters.
* @return number of clusters.
* @uml.property name="numberOfClusters"
*/
public int getNumberOfClusters() {
return numberOfClusters;
}
/**
* sets number of clusters.
* @param numberOfClusters new number of clusters.
* @uml.property name="numberOfClusters"
*/
public void setNumberOfClusters(int numberOfClusters) {
this.numberOfClusters = numberOfClusters;
}
/**
* @return
* @uml.property name="weight"
*/
public boolean isWeight() {
return weight;
}
/**
* @param weight
* @uml.property name="weight"
*/
public void setWeight(boolean weight) {
this.weight = weight;
}
/**
* gets the the index array of Categorical attributes in MiningVector.
*
* @return the index array of Categorical attributes in MiningVector.
* @param numAtt,the
* number of all attributes.
*/
public int[] getIndexOfCate(int numAtt) {
int numberOfCate = 0;
int[] indexOfCate;
Vector temp = new Vector();
for (int i = 0; i < numAtt; i++) {
if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
numberOfCate++;
temp.addElement(new Integer(i));
}
}
indexOfCate = new int[numberOfCate];
for (int i = 0; i < numberOfCate; i++) {
indexOfCate[i] = ((Integer) temp.elementAt(i)).intValue();
}
return indexOfCate;
}
/**
* gets the the index array of numric attributes in MiningVector.
*
* @return the index array of numric attributes in MiningVector.
* @param numAtt,
* the number of all attributes.
*/
public int[] getIndexOfNum(int numAtt) {
int numberOfNumeric = 0;
int[] indexOfNumeric;
Vector temp = new Vector();
for (int i = 0; i < numAtt; i++) {
if (metaData.getMiningAttribute(i) instanceof NumericAttribute) {
numberOfNumeric++;
temp.addElement(new Integer(i));
}
}
indexOfNumeric = new int[numberOfNumeric];
for (int i = 0; i < numberOfNumeric; i++) {
indexOfNumeric[i] = ((Integer) temp.elementAt(i)).intValue();
}
return indexOfNumeric;
}
/*
* for testing
*/
public void println(int numAtt, int numVec) {
System.out.println("numAtt=" + numAtt);
System.out.println("numVec=" + numVec);
System.out.println("numOfCat=" + numOfCat);
System.out.println("numOfNumeric=" + numOfNumeric);
System.out.print("indexOfNum=");
for (int i = 0; i < numOfNumeric; i++) {
System.out.print(" ");
System.out.print(indexOfNum[i]);
}
System.out.println();
System.out.print("indexOfCate=");
for (int i = 0; i < numOfCat; i++) {
System.out.print(" ");
System.out.print(indexOfCate[i]);
}
System.out.println("");
System.out.println("------------------------------------");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -