📄 hierarchicalagglomerativefast.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Models.Clustering.Hierarchical.Algorithms;
import java.util.Vector;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Hierarchical.ClusterDistance;
import com.prudsys.pdm.Models.Clustering.Hierarchical.DistanceMatrix;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalCluster;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalClusteringAlgorithm;
import com.prudsys.pdm.Utils.IntVector;
/**
* Class for assymptotical fast hierarchical agglomerative clustering
* using adaptiv distance calculation.
*
* In contrast to HierarchicalAgglomerative it requires twice as much
* memory but only O(n^2) operations in contrast of O(n^3) of
* HierarchicalAgglomerative.
*/
public class HierarchicalAgglomerativeFast extends HierarchicalClusteringAlgorithm
{
/**
* Runs fast hierarchical agglomerative clustering algorithm.
*
* @exception MiningException could not run algorithm
*/
protected void runAlgorithm() throws MiningException {
// Number of attributes and vectors:
int numbAtt = metaData.getAttributesNumber();
int numbVec = miningInputStream.getVectorsNumber();
// Get minimum and maximum of attributes, used if normalization:
if (distance.isNormalized()) {
double[] minArr = new double[ numbAtt ];
double[] maxArr = new double[ numbAtt ];
for (int i = 0; i < numbAtt; i++) {
minArr[i] = 0.0;
maxArr[i] = 0.0;
};
while (miningInputStream.next()) {
MiningVector vec = miningInputStream.read();
for (int i = 0; i < numbAtt; i++) {
if (vec.getValue(i) < minArr[i])
minArr[i] = vec.getValue(i);
if (vec.getValue(i) > maxArr[i])
maxArr[i] = vec.getValue(i);
};
};
distance.setMinAtt( minArr );
distance.setMaxAtt( maxArr );
};
// Form all vector clusters:
int numbClust = 2*numbVec - 1;
HierarchicalCluster[] hclust = new HierarchicalCluster[numbClust];
for (int i = 0; i < numbVec; i++) {
MiningVector mv = miningInputStream.read(i);
HierarchicalCluster hc = new HierarchicalCluster();
hc.setCenterVec(mv);
Vector contVec = new Vector();
contVec.addElement(mv);
hc.setContainedVectors(contVec);
hc.setLeaf(true);
hc.setIndex(i);
hclust[i] = hc;
};
// Calculate all distances of vector clusters and get minimum distance:
ClusterDistance clustDist = (ClusterDistance) distance;
DistanceMatrix distMat = new DistanceMatrix();
distMat.initDistanceArray(numbVec);
clustDist.setDistanceMatrix(distMat);
// Sort distance list:
int numbDist = (numbClust-1)*numbClust / 2;
int[] ordDist = new int[numbDist];
for (int i = 0; i < numbDist; i++)
ordDist[i] = -1;
// Calculate all distances between vector-clusters and order them:
int id = 0;
for (int i = 0; i < numbVec; i++)
for (int j = i+1; j < numbVec; j++) {
// Distance:
double dist = clustDist.clusterDistance( hclust[i], hclust[j] );
// Order:
int ind = distMat.c2i(i, j);
ordDist[id] = ind;
int no = id;
while ( no > 0 &&
distMat.getDistanceArray(ordDist[no]) <
distMat.getDistanceArray(ordDist[no-1] )
)
{
int it = ordDist[no];
ordDist[no] = ordDist[no-1];
ordDist[no-1] = it;
no = no - 1;
};
id = id + 1;
};
System.out.println("...matrix distance. Get clusters:");
// Add cluster by cluster iteratively:
boolean[] usedClust = new boolean[numbClust];
for (int nclust = numbVec; nclust < numbClust; nclust++) {
int ind;
int im;
int jm;
while (true) {
ind = ordDist[0];
int[] cc = distMat.i2c(ind);
im = cc[0];
jm = cc[1];
for (int i = 0; i < id-1; i++)
ordDist[i] = ordDist[i+1];
ordDist[id-1] = -1;
id = id - 1;
if (!usedClust[im] && !usedClust[jm])
break;
};
double mdist = distMat.getDistanceArray(ind);
// Merge nearest clusters:
HierarchicalCluster hc = new HierarchicalCluster(hclust[im], hclust[jm], mdist);
hc.setIndex(nclust);
hclust[nclust] = hc;
usedClust[im] = true;
usedClust[jm] = true;
// Add new cluster distances:
for (int i = 0; i < nclust; i++) {
if (usedClust[i])
continue;
double dist = clustDist.clusterDistance(hclust[i], hclust[nclust]);
ind = distMat.c2i(i, nclust);
ordDist[id] = ind;
id = id + 1;
int no = id-1;
while ( no > 0 &&
distMat.getDistanceArray(ordDist[no]) <
distMat.getDistanceArray(ordDist[no-1] )
)
{
int it = ordDist[no];
ordDist[no] = ordDist[no-1];
ordDist[no-1] = it;
no = no - 1;
};
};
};
// Create array of clusters, last cluster is root:
clusters = new HierarchicalCluster[numbClust];
for (int i = 0; i < numbClust; i++)
clusters[i] = (HierarchicalCluster) hclust[i];
clustDist.setDistanceMatrix(null);
System.out.println("...calculation finished." );
};
/**
* Runs fast hierarchical agglomerative clustering algorithm.
* Uses a vector structure for dynamic distance storage.
*
* @exception MiningException could not run algorithm
*/
protected void runAlgorithm2() throws MiningException {
// Number of attributes and vectors:
int numbAtt = metaData.getAttributesNumber();
int numbVec = miningInputStream.getVectorsNumber();
// Get minimum and maximum of attributes, used if normalization:
if (distance.isNormalized()) {
double[] minArr = new double[ numbAtt ];
double[] maxArr = new double[ numbAtt ];
for (int i = 0; i < numbAtt; i++) {
minArr[i] = 0.0;
maxArr[i] = 0.0;
};
while (miningInputStream.next()) {
MiningVector vec = miningInputStream.read();
for (int i = 0; i < numbAtt; i++) {
if (vec.getValue(i) < minArr[i])
minArr[i] = vec.getValue(i);
if (vec.getValue(i) > maxArr[i])
maxArr[i] = vec.getValue(i);
};
};
distance.setMinAtt( minArr );
distance.setMaxAtt( maxArr );
};
// Form all vector clusters:
int numbClust = 2*numbVec - 1;
HierarchicalCluster[] hclust = new HierarchicalCluster[numbClust];
for (int i = 0; i < numbVec; i++) {
MiningVector mv = miningInputStream.read(i);
HierarchicalCluster hc = new HierarchicalCluster();
hc.setCenterVec(mv);
Vector contVec = new Vector();
contVec.addElement(mv);
hc.setContainedVectors(contVec);
hc.setLeaf(true);
hc.setIndex(i);
hclust[i] = hc;
};
// Calculate all distances of vector clusters and get minimum distance:
ClusterDistance clustDist = (ClusterDistance) distance;
DistanceMatrix distMat = new DistanceMatrix();
distMat.initDistanceArray(numbVec);
clustDist.setDistanceMatrix(distMat);
// Sort distance list:
int numbDist = (numbClust-1)*numbClust / 2;
IntVector ordDist = new IntVector();
// Calculate all distances between vector-clusters and order them:
for (int i = 0; i < numbVec; i++)
for (int j = i+1; j < numbVec; j++) {
// Distance:
double dist = clustDist.clusterDistance( hclust[i], hclust[j] );
// Order:
int ind = distMat.c2i(i, j);
// int[] cc = distMat.i2c(ind);
//System.out.println("i=" + i + " j=" + j + " ind=" + ind + " ii=" + cc[0] + " jj=" + cc[1]);
ordDist.addElement(ind);
int no = ordDist.size()-1;
while ( no > 0 &&
distMat.getDistanceArray(ordDist.IntegerAt(no)) <
distMat.getDistanceArray(ordDist.IntegerAt(no-1))
)
{
int it = ordDist.IntegerAt(no);
ordDist.setElementAt(ordDist.IntegerAt(no-1), no);
ordDist.setElementAt(it, no-1);
no = no - 1;
};
};
// for (int i = 0; i < numbDist; i++)
// System.out.println("dist["+i+"]=" + distMat.getDistanceArray(ordDist.IntegerAt(i)));
// Add cluster by cluster iteratively:
boolean[] usedClust = new boolean[numbClust];
for (int nclust = numbVec; nclust < numbClust; nclust++) {
//System.out.println("nclust=" + nclust);
int ind;
int im;
int jm;
while (true) {
ind = ordDist.IntegerAt(0);
int[] cc = distMat.i2c(ind);
im = cc[0];
jm = cc[1];
if (usedClust[im] || usedClust[jm])
ordDist.removeElementAt(0);
else
break;
};
double mdist = distMat.getDistanceArray( ind );
// Merge nearest clusters:
HierarchicalCluster hc = new HierarchicalCluster(hclust[im], hclust[jm], mdist);
hc.setIndex(nclust);
hclust[nclust] = hc;
usedClust[im] = true;
usedClust[jm] = true;
// Add new cluster distances:
for (int i = 0; i < nclust; i++) {
if (usedClust[i])
continue;
double dist = clustDist.clusterDistance(hclust[i], hclust[nclust]);
ind = distMat.c2i(i, nclust);
ordDist.addElement(ind);
int no = ordDist.size()-1;
while ( no > 0 &&
distMat.getDistanceArray(ordDist.IntegerAt(no)) <
distMat.getDistanceArray(ordDist.IntegerAt(no-1))
)
{
int it = ordDist.IntegerAt(no);
ordDist.setElementAt(ordDist.IntegerAt(no-1), no);
ordDist.setElementAt(it, no-1);
no = no - 1;
};
};
};
// Create array of clusters, last cluster is root:
clusters = new HierarchicalCluster[numbClust];
for (int i = 0; i < numbClust; i++)
clusters[i] = (HierarchicalCluster) hclust[i];
clustDist.setDistanceMatrix(null);
System.out.println("...calculation finished." );
};
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -