📄 hierarchy.c
字号:
/****************************************************************
File Name: hierarchy.C
Author: Tian Zhang, CS Dept., Univ. of Wisconsin-Madison, 1995
Copyright(c) 1995 by Tian Zhang
All Rights Reserved
Permission to use, copy and modify this software must be granted
by the author and provided that the above copyright notice appear
in all relevant copies and that both that copyright notice and this
permission notice appear in all relevant supporting documentations.
Comments and additions may be sent the author at zhang@cs.wisc.edu.
******************************************************************/
#include <assert.h>
#include "global.h"
#include "util.h"
#include "vector.h"
#include "rectangle.h"
#include "cfentry.h"
#include "cutil.h"
#include "parameter.h"
#include "hierarchy.h"
/* for MergeHierarchy use only */
static void Update_Distance(int n1, int n2, int CurI, int NextI,
int n, int *checked, double *dist)
{
int i, j;
double d1, d2;
for (i=0; i<CurI; i++) {
if (checked[i]!=0) {
if (i!=NextI) {
d1 = dist[i*n-i*(i+1)/2+CurI-i-1];
if (i<NextI) d2 = dist[i*n-i*(i+1)/2+NextI-i-1];
else d2 = dist[NextI*n-NextI*(NextI+1)/2+i-NextI-1];
dist[i*n-i*(i+1)/2+CurI-i-1] = (n1*d1+n2*d2)/(n1+n2);
}}}
for (j=CurI+1; j<n; j++) {
if (checked[j]!=0) {
if (j!=NextI) {
d1 = dist[CurI*n-CurI*(CurI+1)/2+j-CurI-1];
if (j<NextI) d2 = dist[j*n-j*(j+1)/2+NextI-j-1];
else d2 = dist[NextI*n-NextI*(NextI+1)/2+j-NextI-1];
dist[CurI*n-CurI*(CurI+1)/2+j-CurI-1] = (n1*d1+n2*d2)/(n1+n2);
}}}
}
/* for MergeHierarchy use only */
static int Pick_One_Unchecked(int n, int *checked)
{
int i,j = rand() % n;
for (i=0;i<n;i++)
if (checked[(i+j)%n]!=0) return (i+j)%n;
return -1;
}
/* for MergeHierarchy use only */
static int Nearest_Neighbor(int CurI, int n, int *checked, double *dist)
{
int i, imin=0;
double d, dmin = HUGE;
for (i=0;i<CurI;i++) {
if (checked[i]!=0){
d = dist[i*n-i*(i+1)/2+CurI-i-1];
if (d<dmin) { dmin=d; imin=i; }
}}
for (i=CurI+1;i<n;i++) {
if (checked[i]!=0){
d = dist[CurI*n-CurI*(CurI+1)/2+i-CurI-1];
if (d<dmin) { dmin=d; imin=i; }
}}
if (dmin<HUGE) return imin;
else return -1;
}
/* for SplitHierarchy use only */
static int Farthest_Merge(int chainptr, int *chain, double *dd)
{
if (chainptr<=0) return chainptr;
double d, dmax = 0;
int i, imax = -1;
for (i=0; i<=chainptr; i++) {
if (chain[i]<0) {
d = dd[-chain[i]-1];
if (d>dmax) {imax = i; dmax = d;}
}}
return imax;
}
/* for SplitHierarchy use only */
static int Largest_Merge(int chainptr, int *chain, Entry *cf, short ftype, double ft)
{
for (int i=0; i<=chainptr; i++) {
if (chain[i]<0) {
if (cf[-chain[i]-1].Fitness(ftype)>ft)
return i;
}}
return -1;
}
/*********************************************************************
This is an O(n^2) algorithm, but works only for D2 and D4 due to the
"REDUCIBILITY PROPERTY" that is given as below:
if d(i,j) < p, d(i,k) > p, d(j,k) > p then d(i+j,k) > p.
Algorithm:
Part 1: MergeHierarchy:
form the complete hierarchy in O(n^2) time
step1: Pick any cluster i[1].
step2: Determine the chain i[2]=NN(i[1]), i[3]=NN(i[2]) ...
until i[k]=NN(i[k-1]) and i[k-1]=NN(i[k]), where NN
means nearest neighbor.
step3: Merge clusters i[k-1] and i[k].
step4: If more than 1 clusters left, goto step using i[k-2]
as start if (k-2>0), otherwise choose arbitrary i[1].
Part 2: SplitHierarchy:
cut from the complete hierarchy in O(n^2)
by number of clusters K,
by the threshold Ft,
by tree height H,
...
*********************************************************************/
void Hierarchy::MergeHierarchy(short gdtype, // distance type
int nentry, // number of entries
Entry *entries) // array storing entries
{
if (gdtype!=D2 && gdtype!=D4)
print_error("MergeHierarchy","only support distance D2 and D4");
int i,j,n1,n2;
int CurI, PrevI, NextI;
int uncheckcnt = nentry;
int *checked = new int[nentry];
for (i=0;i<nentry;i++) checked[i]=i+1;
// 0: invalid after being merged to other entries
// positive 1..nentry+1 : original entries
// negative -1..-(nentry-1) : merged entries
// get initial distances
double *dist=new double[nentry*(nentry-1)/2];
for (i=0; i<nentry-1; i++)
for (j=i+1; j<nentry; j++)
dist[i*nentry-i*(i+1)/2+j-i-1] =
distance(gdtype,entries[i],entries[j]);
CurI = rand() % nentry; // step1
chain[++chainptr]=CurI;
while (uncheckcnt>1) {
if (chainptr==-1) { // step4
chainptr++;
chain[chainptr]=Pick_One_Unchecked(nentry,checked);
}
if (chainptr>0)
PrevI=chain[chainptr-1];
else PrevI=-1;
stopchain=FALSE;
while (stopchain==FALSE) { // step2
CurI=chain[chainptr];
NextI = Nearest_Neighbor(CurI,nentry,checked,dist);
// it is impossible NextI be -1 because uncheckcnt>1
if (NextI==PrevI)
stopchain = TRUE;
else {
chain[++chainptr]=NextI;
PrevI = CurI;
}
} // end of while for step 2
step++;
ii[step] = checked[CurI]; // step3
jj[step] = checked[NextI];
if (CurI<NextI)
dd[step] = dist[CurI*nentry-CurI*(CurI+1)/2+NextI-CurI-1];
else
dd[step] = dist[NextI*nentry-NextI*(NextI+1)/2+CurI-NextI-1];
if (checked[CurI]>0) {
n1 = entries[CurI].n;
if (checked[NextI]>0) {
n2 = entries[NextI].n;
cf[step].Add(entries[CurI],entries[NextI]);
}
else {
n2 = cf[-checked[NextI]-1].n;
cf[step].Add(entries[CurI],cf[-checked[NextI]-1]);
}
}
else {
n1 = cf[-checked[CurI]-1].n;
if (checked[NextI]>0) {
n2 = entries[NextI].n;
cf[step].Add(cf[-checked[CurI]-1],entries[NextI]);
}
else {
n2 = cf[-checked[NextI]-1].n;
cf[step].Add(cf[-checked[CurI]-1],cf[-checked[NextI]-1]);
}
}
Update_Distance(n1,n2,CurI,NextI,nentry,checked,dist);
uncheckcnt--;
checked[CurI] = -(step+1);
checked[NextI] = 0;
chainptr--; chainptr--;
} //end of while (uncheckcnt>1)
if (checked) delete [] checked;
if (dist) delete [] dist;
// prepare for SplitHierarchy
stopchain = FALSE;
chainptr = 0;
chain[chainptr] = -(step+1);
}
short Hierarchy::SplitHierarchy(short option,short ftype,double ft)
{
int i, j;
switch (option) {
case BY_THRESHOLD:
if (stopchain==TRUE) return FALSE;
i = Largest_Merge(chainptr,chain,cf,ftype,ft);
if (i!=-1) {j = -chain[i]-1;
chain[i] = ii[j];
chain[++chainptr]=jj[j];
return TRUE;
}
else {stopchain = TRUE; return FALSE;}
break;
case BY_KCLUSTERS:
if (chainptr==size) return FALSE;
i = Farthest_Merge(chainptr,chain,dd);
if (i!=-1) {j = -chain[i]-1;
chain[i]=ii[j];
chain[++chainptr]=jj[j];
return TRUE;
}
else return FALSE;
break;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -