📄 hierarchicalagglomerative.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Models.Clustering.Hierarchical.Algorithms;
import java.util.Vector;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Hierarchical.ClusterDistance;
import com.prudsys.pdm.Models.Clustering.Hierarchical.DistanceMatrix;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalCluster;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalClusteringAlgorithm;
/**
* Class for hierarchical agglomerative clustering.
*/
public class HierarchicalAgglomerative extends HierarchicalClusteringAlgorithm
{
/**
* Runs hierarchical agglomerative clustering algorithm.
*
* @exception MiningException could not run algorithm
*/
protected void runAlgorithm() throws MiningException {
// Number of attributes and vectors:
int numbAtt = metaData.getAttributesNumber();
int numbVec = 0;
// Get minimum and maximum of attributes, used if normalization:
double[] minArr = new double[ numbAtt ];
double[] maxArr = new double[ numbAtt ];
for (int i = 0; i < numbAtt; i++) {
minArr[i] = 0.0;
maxArr[i] = 0.0;
};
while (miningInputStream.next()) {
MiningVector vec = miningInputStream.read();
for (int i = 0; i < numbAtt; i++) {
if (vec.getValue(i) < minArr[i])
minArr[i] = vec.getValue(i);
if (vec.getValue(i) > maxArr[i])
maxArr[i] = vec.getValue(i);
};
numbVec = numbVec + 1;
};
distance.setMinAtt( minArr );
distance.setMaxAtt( maxArr );
// Form all vector clusters:
Vector hclust = new Vector();
int ind = 0;
miningInputStream.reset();
while (miningInputStream.next()) {
MiningVector mv = miningInputStream.read();
HierarchicalCluster hc = new HierarchicalCluster();
hc.setCenterVec(mv);
Vector contVec = new Vector();
contVec.addElement(mv);
hc.setContainedVectors(contVec);
hc.setLeaf(true);
hc.setIndex(ind);
hclust.addElement(hc);
ind = ind + 1;
};
// Calculate all distances of vector clusters and get minimum distance:
ClusterDistance clustDist = (ClusterDistance) distance;
DistanceMatrix distMat = new DistanceMatrix();
distMat.initDistanceArray(numbVec);
clustDist.setDistanceMatrix( distMat );
for (int i = 0; i < numbVec; i++)
for (int j = i+1; j < numbVec; j++)
clustDist.clusterDistance( (HierarchicalCluster) hclust.elementAt(i),
(HierarchicalCluster) hclust.elementAt(j) );
System.out.println("...matrix distance. Get clusters:");
int nclust = numbVec;
Vector usedClust = new Vector();
for (int i = 0; i < nclust; i++)
usedClust.addElement( new Boolean(false) );
// Add cluster by cluster iteratively:
while(true) {
// Only one cluster not used => root => quit:
int nused = 0;
for (int i = 0; i < nclust; i++)
if ( ((Boolean) usedClust.elementAt(i)).booleanValue() == false)
nused = nused + 1;
if (nused == 1)
break;
// Find cluster of minimum distance:
int im = -1;
int jm = -1;
double mdist = Double.POSITIVE_INFINITY;
for (int i = 0; i < nclust; i++)
for (int j = i+1; j < nclust; j++) {
// One of the clusters already used:
if ( ( ((Boolean) usedClust.elementAt(i)).booleanValue() == true) ||
( ((Boolean) usedClust.elementAt(j)).booleanValue() == true) )
continue;
// Calculate cluster distance:
double dist = clustDist.clusterDistance( (HierarchicalCluster) hclust.elementAt(i),
(HierarchicalCluster) hclust.elementAt(j) );
// Find minimum cluster distance:
if (dist <= mdist) {
mdist = dist;
im = i;
jm = j;
};
};
// Merge nearest clusters:
HierarchicalCluster hc1 = (HierarchicalCluster) hclust.elementAt(im);
HierarchicalCluster hc2 = (HierarchicalCluster) hclust.elementAt(jm);
HierarchicalCluster hc = new HierarchicalCluster(hc1, hc2, mdist);
hc.setIndex(nclust);
if (clustDist.getClustDistType() == clustDist.CDTYPE_CENTROID) {
MiningVector c1 = hc1.getCenterVec();
MiningVector c2 = hc2.getCenterVec();
double[] val1 = c1.getValues();
double[] val2 = c2.getValues();
double[] valc = new double[numbAtt];
double w1 = hc1.getWeight();
double w2 = hc2.getWeight();
double w = w1 + w2;
for (int i = 0; i < numbAtt; i++)
valc[i] = (w1*val1[i] + w2*val2[i]) / w;
MiningVector mv = new MiningVector(valc);
mv.setMetaData( c1.getMetaData() );
hc.setCenterVec(mv);
hc.setWeight(w);
};
hclust.addElement(hc);
usedClust.setElementAt( new Boolean(true), im );
usedClust.setElementAt( new Boolean(true), jm );
usedClust.addElement( new Boolean(false) );
nclust = nclust + 1;
};
// Find root cluster:
int iroot = -1;
for (int i = 0; i < nclust; i++)
if ( ((Boolean) usedClust.elementAt(i)).booleanValue() == false)
iroot = i;
HierarchicalCluster root = (HierarchicalCluster) hclust.elementAt(iroot);
// Create array of clusters, last cluster is root:
clusters = new HierarchicalCluster[nclust];
for (int i = 0; i < nclust; i++)
clusters[i] = (HierarchicalCluster) hclust.elementAt(i);
clustDist.setDistanceMatrix(null);
System.out.println("...calculation finished." );
};
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -