📄 distancematrix.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.1
*/
package com.prudsys.pdm.Models.Clustering.Hierarchical;
import java.util.Enumeration;
import java.util.Hashtable;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
/**
* Matrix containing all distances between hierarchical clusters.
*
* Likewise, hashtable, two- and one-dimensional array can be uesed.
*/
public class DistanceMatrix extends com.prudsys.pdm.Cwm.Core.Class
{
// -----------------------------------------------------------------------
// Constants of distance matrix
// -----------------------------------------------------------------------
/** Coordinate separator required for hashtable. No more used. */
public static final long CLUSTER_INDEX_SEPARATOR = 1000000;
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Hashtable containing all distances between clusters. No more used. */
private Hashtable distHash = new Hashtable();
/** Array of arrays storing all distances between clusters. No more used. */
private float distArr[][];
/** One-dimensional array of storing all distances between clusters. */
private float distArr1[];
/** Number of clusters. */
private int nclust = -1;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public DistanceMatrix()
{
}
// -----------------------------------------------------------------------
// Methods of distance calculation
// -----------------------------------------------------------------------
/**
* Adds new distance to cluster pair.
*
* @param hc1 cluster 1
* @param hc2 cluster 2
* @param distance distance betweeen cluster 1 and cluster 2
* @return previous distance if cluster pair already exist, otherwise null
* @throws MiningException cannot add distance
*/
public Object putDistance(HierarchicalCluster hc1, HierarchicalCluster hc2,
double distance) throws MiningException {
// Check for valid indexes:
int i1 = hc1.getIndex();
int i2 = hc2.getIndex();
if (i1 < 0 || i2 < 0)
throw new MiningException("Wrong index of one cluster");
// First index must be lower one:
if (i1 > i2) {
int it = i1;
i1 = i2;
i2 = it;
};
// return putDistanceHash(i1, i2, distance);
return putDistanceArray(i1, i2, distance);
}
/**
* Returns distance between two clusters.
*
* @param hc1 cluster 1
* @param hc2 cluster 2
* @return value of distance pair, Category.MISSING_VALUE if not found
* @throws MiningException cannot get distance
*/
public double getDistance(HierarchicalCluster hc1, HierarchicalCluster hc2)
throws MiningException {
// Check for valid indexes:
int i1 = hc1.getIndex();
int i2 = hc2.getIndex();
if (i1 < 0 || i2 < 0)
throw new MiningException("Wrong index of one cluster");
// First index must be lower one:
if (i1 > i2) {
int it = i1;
i1 = i2;
i2 = it;
};
// return getDistanceHash(i1, i2);
return getDistanceArray(i1, i2);
}
/**
* Adds new distance to cluster pair using hashtable.
*
* @param i1 index of cluster 1
* @param i2 index of cluster 2, i2 > i1
* @param distance distance betweeen cluster 1 and cluster 2
* @return previous distance if cluster pair already exist, otherwise null
* @throws MiningException cannot add distance
*/
public Object putDistanceHash(int i1, int i2, double distance)
throws MiningException {
// Assemble key:
long key = i1*CLUSTER_INDEX_SEPARATOR + i2;
// Put distance:
return distHash.put( new Long(key), new Double(distance) );
}
/**
* Returns distance between two clusters.
*
* @param i1 index of cluster 1
* @param i2 index of cluster 2, i2 > i1
* @return value of distance pair, Category.MISSING_VALUE if not found
* @throws MiningException cannot get distance
*/
public double getDistanceHash(int i1, int i2)
throws MiningException {
// Assemble key:
long key = i1*CLUSTER_INDEX_SEPARATOR + i2;
Double Dist = (Double) distHash.get( new Long(key) );
// Get distance:
if (Dist == null) return Category.MISSING_VALUE;
else return Dist.doubleValue();
}
/**
* Init array of distance matrix of all clusters.
*
* The distance pairs are stored as upper triangle of the complete
* distance matrix. Number of elements: (nclust-1)*nclust / 2
* where nclust is the number of all clusters. Note that
* nclust = 2*nvec-1 for nvec beeing the number of all vectors
* to be clustered.
*
* The distances are initialized with -1 values in order
* to indicate that they do not contain a valid distance.
*
* @param nvec number of vectors to be clustered
*/
public void initDistanceArray(int nvec) {
// Number of clusters:
nclust = 2*nvec-1;
/*
// Two-dimensional array for all distances:
distArr = new float[nclust-1][];
for (int i = 0; i < nclust-1; i++) {
distArr[i] = new float[nclust-i-1];
for (int j = 0; j < nclust-i-1; j++)
distArr[i][j] = -1;
};
*/
// One-dimensional array for all distances:
int ndist = (nclust-1)*nclust / 2;
distArr1 = new float[ ndist ];
for (int i = 0; i < ndist; i++)
distArr1[i] = -1;
}
/**
* Calculates absulute distance index from cluster coordinates.
*
* @param i coordinate 1
* @param j coordinate 2
* @return absolute index
*/
public int c2i(int i, int j) {
int ind = i*(nclust-1) - i*(i-1)/2 + j-i-1;
return ind;
}
/**
* Calculates distance cluster coordinates from index.
*
* @param ind absolute index
* @return array of coordinates [i,j]
*/
public int[] i2c(int ind) {
double p = (2*(nclust-1)+1.0)/2;
double q = 2*ind;
double s1 = p - Math.sqrt(p*p-q);
int i = (int) s1;
int j = ind - i*(nclust-1)+i*(i-1)/2 + i+1;
int[] cc = {i, j};
return cc;
}
/**
* Adds new distance to cluster pair using one-dimensional array.
*
* @param i1 index of cluster 1
* @param i2 index of cluster 2, i2 > i1
* @param distance distance betweeen cluster 1 and cluster 2
* @return always null
* @throws MiningException cannot add distance
*/
public Object putDistanceArray(int i1, int i2, double distance)
throws MiningException {
// distArr[i1][i2-i1-1] = (float) distance;
int ind = c2i(i1, i2);
distArr1[ind] = (float) distance;
return null;
}
/**
* Adds new distance to cluster pair using one-dimensional array.
*
* @param ind absolute index of cluster distance
* @param distance distance betweeen cluster 1 and cluster 2
* @return always null
* @throws MiningException cannot add distance
*/
public Object putDistanceArray(int ind, double distance) {
distArr1[ind] = (float) distance;
return null;
}
/**
* Returns distance between two clusters.
*
* @param i1 index of cluster 1
* @param i2 index of cluster 2, i2 > i1
* @return value of distance pair, Category.MISSING_VALUE if not found
* @throws MiningException cannot get distance
*/
public double getDistanceArray(int i1, int i2)
throws MiningException {
// double val = distArr[i1][i2-i1-1];
int ind = c2i(i1, i2);
double val = distArr1[ind];
if (val < -0.5) val = Category.MISSING_VALUE;
return val;
}
/**
* Returns distance between two clusters using one-dimensional array.
*
* @param ind absolute index of cluster distance
* @return value of distance pair, Category.MISSING_VALUE if not found
* @throws MiningException cannot get distance
*/
public double getDistanceArray(int ind)
throws MiningException {
double val = distArr1[ind];
if (val < -0.5) val = Category.MISSING_VALUE;
return val;
}
/**
* Returns string representation of distance matrix.
*
* @return string representation of distance matrix
*/
public String toString() {
String out = "distances: " + "\n";
// Hash table:
Enumeration keys = distHash.keys();
while (keys.hasMoreElements()) {
Long Key = (Long) keys.nextElement();
Double Value = (Double) distHash.get(Key);
long key = Key.longValue();
double value = Value.doubleValue();
int i1 = (int) (key / CLUSTER_INDEX_SEPARATOR);
int i2 = (int) (key - i1*CLUSTER_INDEX_SEPARATOR);
String exp = "(" + i1 + ", " + i2 + ") = " + value;
out = out + exp + "\n";
};
// One-dimensional array:
for (int i = 0; i < nclust-1; i++) {
for (int j = i+1; j < nclust; j++) {
int ind = c2i(i, j);
double value = distArr1[ind];
String exp = "(" + i + ", " + j + ") = " + value;
out = out + exp + "\n";
};
};
return out;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -