📄 fwkmeans.java
字号:
- clusters[k].getCenterVec().getValue(indexOfNum[j]);
DD = DD + Math.pow(partition[i][k], alpha) * temp0 * temp0;
}
}
DJ[indexOfNum[j]] = DD;
}
}
if (numOfCat > 0) {
for (int j = 0; j < numOfCat; j++) {
double DD = 0.0;
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
for (int k = 0; k < numberOfClusters; k++) {
if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
DD = DD + Math.pow(partition[i][k],alpha)*1.0;
}
}
}
if (DD < minCatDl) {
DJ[indexOfCate[j]] = minCatDl;
} else
DJ[indexOfCate[j]] = DD;
}
}
for (int j = 0; j < numAtt; j++) {
total += Math.pow(1.0 / DJ[j], 1.0 / (distance.getBeta() - 1));
}
for (int j = 0; j < numAtt; j++) {
weights[j] = 1.0 / (Math.pow(DJ[j], 1.0 / (distance.getBeta() - 1)) * total);
}
distance.setFieldWeights(weights);
}
/**
* method to change prototypes of all attributes. for the CategoricalAttribute, Select the most frequent category of
* each categorical attribute as the prototype values of the cluster.
*
* @param cluster
* @param numAtt
* ,the number of all attributes.
*
* Find out that in the FWKeans algorithm, the method "changePrototype" is the same with WKeans. So next step we
* begin to sovle the problem 2,Whose purpose is to compute cost of the disperition!
* @throws MiningException
*/
private void changePrototype(int cluster) throws MiningException {
double maxFreq;
//int index;
double[] value = new double[numAtt];// I konw why all the values of the attributs are double!
MiningVector centerVec = new MiningVector(value);
centerVec.setMetaData(metaData);
/** change prototypes of numeric attributes */
if (numOfNumeric > 0) {
for (int j = 0; j < numOfNumeric; j++) {
double ux = 0.0;
double u = 0.0;
for (int i = 0; i < numVec; i++) {
MiningVector miningvector = miningInputStream.read(i);
u = u + Math.pow(partition[i][cluster], alpha);
ux = ux + Math.pow(partition[i][cluster], alpha) * miningvector.getValue(indexOfNum[j]);
}
centerVec.setValue(indexOfNum[j], ux / u);// totalSum[][], and the
}
}
/** change prototypes of categorical attributes */
if (numOfCat > 0) {
for (int j = 0; j < numOfCat; j++) {
maxFreq = 0.0;
CategoricalAttribute cateAtt = (CategoricalAttribute) metaData.getMiningAttribute(indexOfCate[j]);
double[] modelatt = new double[cateAtt.getCategoriesNumber()];
for (int i = 0; i < numVec; i++) {
MiningVector miningvector = miningInputStream.read(i);
for (int k = 0; k < cateAtt.getCategoriesNumber(); k++) {
if (k == (int) miningvector.getValue(indexOfCate[j])) {
modelatt[k] += Math.pow(partition[i][cluster],alpha);
}
}
}
maxFreq = modelatt[0];
int index = 0;
for (int k = 0; k < cateAtt.getCategoriesNumber(); k++) {
if (maxFreq < modelatt[k]) {
maxFreq = modelatt[k];
index = k;
}
}
centerVec.setValue(indexOfCate[j], index);
}
}
clusters[cluster].setCenterVec(centerVec);
}
/**
* run WKMeans algorithm
*
* @exception MiningException
* can't run algorithm
*/
@SuppressWarnings("unchecked")
protected void runAlgorithm() throws MiningException {
/* the number of data objects to be clustered */
int numVec = 0;
/* get the number of all attributes */
int numAtt = metaData.getAttributesNumber();
// System.out.println(numAtt +" :numAtr");
/* initialize the global variables */
this.initialization(numAtt); // can be used directely!
// System.out.println("ini be used!");
/*
* computes the number of data objects to be clutered and computes the mean value of each numeric attribute. we
* can invoke the method 'miningInputStream.getVectorNumber()'to get the the number of data objects to be
* clutered
*/
while (miningInputStream.next()) { // /can be used directely, because each dataming algorithm need these!
MiningVector vec = miningInputStream.read();
for (int j = 0; j < numOfNumeric; j++) {
meanValue[j] += vec.getValue(indexOfNum[j]);
}
numVec++;
}
for (int j = 0; j < numOfNumeric; j++) {
meanValue[j] /= numVec;
}
// get the number of instances!
this.numVec = numVec;
// assign space the partition martrix!
this.partition = new double[numVec][numberOfClusters];
// this line may be wrong!
this.objClusterID = new int[numVec];
// -------------test-------------------------
// this.println(numAtt,numVec);
// ------------------------------------------
/* initilizes the weight of each attribute */
if (distance.getFieldWeights() == null && weight) {// initial the weight at suitable time! can be used
// directly!
this.weightInit();
}
/* creates array of clusters */
clusters = new Cluster[numberOfClusters]; // can be used directly!
for (int i = 0; i < numberOfClusters; i++) {
clusters[i] = new Cluster();
clusters[i].setName("clust" + String.valueOf(i));
}
/*
* chooses numberOfClusters vectors(objects)randomly as initial prototypes
*/
this.prototypeInit();// can be used directly!
// ---------------------------------------------
System.out.println("initial prototypes:");
for (int i = 0; i < numberOfClusters; i++) {
System.out.println(" Cluster[" + i + "]:" + clusters[i].getCenterVec());
}
// ---------------------------------------------
// ----------------------------------------------
if (weight) {
double[] w = distance.getFieldWeights(); // can be used directly!
System.out.print("initial weights:");
for (int i = 0; i < w.length; i++) {
System.out.print(" " + w[i]);
}
System.out.println();
System.out.println("---------------------------------");
}
// -----------------------------------------------
double dispersion, dispersion1;
/*
* there are some difference with the WKeans algorithm! compute object function value of P1 and assign every
* object to each prototype with a percent
*/
dispersion = this.getP1Cost(0, numVec);
/* the process of clustering */
while (true) {
for (int i = 0; i < numberOfClusters; i++) {
this.changePrototype(i);
System.out.println("the "+i+" cluster");
}
// ---------------------------------------------
System.out.println("new prototypes:");
for (int i = 0; i < numberOfClusters; i++) {
System.out.println(" Cluster[" + i + "]:" + clusters[i].getCenterVec());
}
// ---------------------------------------------
/* update attibutes Weight */
if (weight) {
this.changeWeights(numAtt, numVec);
}
dispersion1 = this.getP3Cost(0, numVec);
// ------------------------------------------------------------
if (weight) {
double[] w1 = distance.getFieldWeights();
System.out.print("new weights:");
for (int i = 0; i < w1.length; i++) {
System.out.print(" " + w1[i]);
}
System.out.println();
System.out.println("---------------------------------");
}
// --------------------------------------------------------------
numberOfIterations++;
if (dispersion1 < 0.000001 || Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001) {
confirmClusters(partition);
break;
}
dispersion = this.getP1Cost(0, numVec);
numberOfIterations++;
if (dispersion < 0.000001 || Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001) {
confirmClusters(partition);
break;
}
}
// -----------------------------------------------------------------------
System.out.println("numberOfIterations=" + numberOfIterations);
// -----------------------------------------------------------------------
/**
* Assign containedVectors variable the appropriate value in the Cluster class. This value can be used to
* transform to WEKA's Istances later for easy data visualization. Similar code can be found in
* KLinkage.runAlgorithm() function.
*/
Vector[] allContainedVectors = new Vector[numberOfClusters];
for (int i = 0; i < numberOfClusters; i++) {
allContainedVectors[i] = new Vector();
}
for (int i = 0; i < numVec; i++) {
MiningVector mingVec = miningInputStream.read(i);
allContainedVectors[objClusterID[i]].addElement(mingVec);
}
/* Set the containedVectors variable */
for (int i = 0; i < numberOfClusters; i++) {
clusters[i].setContainedVectors(allContainedVectors[i]);
}
}
/**
* returns maximum number of iterations.
*
* @return maximum number of iterations.
*/
public int getMaxNumberOfIterations() {
return maxNumberOfIterations;
}
/**
* sets maximum number of iterations.
*
* @param maxNumberOfIterations
* new maximum number of iterations.
*/
public void setMaxNumberOfIterations(int maxNumberOfIterations) {
this.maxNumberOfIterations = maxNumberOfIterations;
}
/**
* returns number of clusters.
*
* @return number of clusters.
*/
public int getNumberOfClusters() {
return numberOfClusters;
}
/**
* sets number of clusters.
*
* @param numberOfClusters
* new number of clusters.
*/
public void setNumberOfClusters(int numberOfClusters) {
this.numberOfClusters = numberOfClusters;
}
/**
*
* @return
*/
public boolean isWeight() {
return weight;
}
/**
*
* @param weight
*/
public void setWeight(boolean weight) {
this.weight = weight;
}
public void setAlpha(double alpha) {
this.alpha = alpha;
}
public double getAlpha() {
return alpha;
}
/**
* gets the the index array of Categorical attributes in MiningVector.
*
* @return the index array of Categorical attributes in MiningVector.
* @param numAtt,the
* number of all attributes.
*/
@SuppressWarnings("unchecked")
public int[] getIndexOfCate(int numAtt) {
int numberOfCate = 0;
int[] indexOfCate;
Vector temp = new Vector();
for (int i = 0; i < numAtt; i++) {
if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
numberOfCate++;
temp.addElement(new Integer(i));
}
}
indexOfCate = new int[numberOfCate];
for (int i = 0; i < numberOfCate; i++) {
indexOfCate[i] = ((Integer) temp.elementAt(i)).intValue();
}
return indexOfCate;
}
/**
* gets the the index array of numric attributes in MiningVector.
*
* @return the index array of numric attributes in MiningVector.
* @param numAtt,
* the number of all attributes.
*/
public int[] getIndexOfNum(int numAtt) {
int numberOfNumeric = 0;
int[] indexOfNumeric;
Vector temp = new Vector();
for (int i = 0; i < numAtt; i++) {
if (metaData.getMiningAttribute(i) instanceof NumericAttribute) {
numberOfNumeric++;
temp.addElement(new Integer(i));
}
}
indexOfNumeric = new int[numberOfNumeric];
for (int i = 0; i < numberOfNumeric; i++) {
indexOfNumeric[i] = ((Integer) temp.elementAt(i)).intValue();
}
return indexOfNumeric;
}
/*
* for testing
*/
public void println(int numAtt, int numVec) {
System.out.println("numAtt=" + numAtt);
System.out.println("numVec=" + numVec);
System.out.println("numOfCat=" + numOfCat);
System.out.println("numOfNumeric=" + numOfNumeric);
System.out.print("indexOfNum=");
for (int i = 0; i < numOfNumeric; i++) {
System.out.print(" ");
System.out.print(indexOfNum[i]);
}
System.out.println();
System.out.print("indexOfCate=");
for (int i = 0; i < numOfCat; i++) {
System.out.print(" ");
System.out.print(indexOfCate[i]);
}
System.out.println("");
System.out.println("------------------------------------");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -