📄 clusterdistance.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Models.Clustering.Hierarchical;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Distance;
/**
* Class for calculating distances between 2 clusters.
*
* This is done via the method clusterDistance.
* If both clusters contain just one vector, the
* method corresponds to the distance method
* of two vectors.
*/
public class ClusterDistance extends Distance
{
// -----------------------------------------------------------------------
// Constants of cluster distance calculation
// -----------------------------------------------------------------------
/** Single linkage, also refered to as nearest neighbour method. */
public static final int CDTYPE_SINGLE_LINKAGE = 1;
/** Complete linkage, also refered to a furthest neighbour method. */
public static final int CDTYPE_COMPLETE_LINKAGE = 2;
/** Average linking, also refered to as UPGMA method. */
public static final int CDTYPE_UNWEIGHTED_PAIR_GROUP_AVERAGE = 3;
/** Weighted average linking, also refered to as WPGMA method. */
public static final int CDTYPE_WEIGHTED_PAIR_GROUP_AVERAGE = 4;
/** Centroid method. */
public static final int CDTYPE_CENTROID = 5;
/** Median method. */
public static final int CDTYPE_MEDIAN = 6;
/** Ward's method. */
public static final int CDTYPE_WARD_METHOD = 7;
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Type of cluster distance calculation method. */
private int clustDistType = CDTYPE_CENTROID;
/** Distance matrix. */
private DistanceMatrix distanceMatrix = null;
// -----------------------------------------------------------------------
// Constructors
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public ClusterDistance()
{
}
/**
* Constructor for a given number of attributes.
*
* @param numbAttributes number of attributes
*/
public ClusterDistance(int numbAttributes) {
super(numbAttributes);
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns cluster distance type (Single linkage, Complete linkage, ...).
*
* @return cluster distance type
*/
public int getClustDistType()
{
return clustDistType;
}
/**
* Sets cluster distance type (Single linkage, Complete linkake, ...).
*
* @param type cluster distance type
*/
public void setClustDistType(int type)
{
this.clustDistType = type;
}
/**
* Returns distance matrix.
*
* @return distance matrix, null if not used
*/
public DistanceMatrix getDistanceMatrix()
{
return distanceMatrix;
}
/**
* Sets distance matrix.
*
* @param distanceMatrix new distance matrix
*/
public void setDistanceMatrix(DistanceMatrix distanceMatrix)
{
this.distanceMatrix = distanceMatrix;
}
// -----------------------------------------------------------------------
// Methods of cluster distance calculation
// -----------------------------------------------------------------------
/**
* Calculates distance between two clusters.
*
* @param clust1 cluster 1
* @param clust2 cluster 2
* @return distance between the two clusters
* @exception MiningException clusters have different meta data
*/
public double clusterDistance(HierarchicalCluster clust1, HierarchicalCluster clust2) throws MiningException {
// Look for distance matrix:
if (distanceMatrix == null)
throw new MiningException("No distance matrix defined");
// Look in distance matrix if exist:
double cdist = distanceMatrix.getDistance(clust1, clust2);
if (! Category.isMissingValue(cdist))
return cdist;
// Distance between vector-clusters => simply vector distance:
int nc1 = clust1.getContainedVectors().size();
int nc2 = clust2.getContainedVectors().size();
if (nc1 == 1 && nc2 == 1) {
MiningVector cv1 = (MiningVector) clust1.getContainedVectors().elementAt(0);
MiningVector cv2 = (MiningVector) clust2.getContainedVectors().elementAt(0);
cdist = distance(cv1, cv2);
// Add to distance matrix:
distanceMatrix.putDistance(clust1, clust2, cdist);
return cdist;
};
// New cluster with subclusters (1,2), other cluster 3:
HierarchicalCluster hc1 = clust1.getChild1();
HierarchicalCluster hc2 = clust1.getChild2();
HierarchicalCluster hc3 = clust2;
if (clust1.getIndex() < clust2.getIndex()) { // cluster 2 is new
hc1 = clust2.getChild1();
hc2 = clust2.getChild2();
hc3 = clust1;
};
int n1 = hc1.getContainedVectors().size(); // vectors in subcluster 1
int n2 = hc2.getContainedVectors().size(); // vectors in subcluster 2
int n3 = hc3.getContainedVectors().size(); // vectors in cluster 3
double d13 = clusterDistance(hc1, hc3); // distance subcl. 1 - cl. 3
double d23 = clusterDistance(hc2, hc3); // distance subcl. 2 - cl. 3
double d12 = clusterDistance(hc1, hc2); // distance sbucl. 1 - subcl. 2
// For type:
switch (clustDistType)
{
case CDTYPE_SINGLE_LINKAGE:
if ( d13 < d23 )
cdist = d13;
else
cdist = d23;
break;
case CDTYPE_COMPLETE_LINKAGE:
if ( d13 < d23 )
cdist = d23;
else
cdist = d13;
break;
case CDTYPE_UNWEIGHTED_PAIR_GROUP_AVERAGE:
cdist = ( d13 + d23 ) / 2;
break;
case CDTYPE_WEIGHTED_PAIR_GROUP_AVERAGE:
cdist = ( n1*d13 + n2*d23 ) / ( n1 + n2 );
break;
case CDTYPE_MEDIAN:
cdist = 0.5*d13 + 0.5*d23 - 0.25*d12;
break;
case CDTYPE_CENTROID:
/*
cdist = distance(clust1.getCenterVec(), clust2.getCenterVec());
*/
cdist = n1*d13/(n1+n2) + n2*d23/(n1+n2) -
n1*n2*d12/( (n1+n2)*(n1+n2) );
break;
case CDTYPE_WARD_METHOD:
cdist = ( (n1 + n3)*d13 + (n2 + n3)*d23 - n3 *d12 ) /
( n1 + n2 + n3 );
break;
default: throw new MiningException("Unknown cluster distance type specified.");
};
// Add to distance matrix:
distanceMatrix.putDistance(clust1, clust2, cdist);
return cdist;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -