📄 hierarchy.c
字号:
double Hierarchy::MinFtMerged(short ftype)
{
int MinI;
double tmpFt, MinFt=HUGE_DOUBLE;
for (int i=0; i<=chainptr; i++) {
if (chain[i]<0) {
tmpFt=cf[-chain[i]-1].Fitness(ftype);
if (tmpFt<MinFt) {MinFt=tmpFt; MinI=i;}
}
}
// prepare for physically merging the MinI cluster
stopchain=FALSE;
chainptr=0;
chain[0]=chain[MinI];
return MinFt;
}
double Hierarchy::DistortionEdge(Entry *entries)
{
double TotalDistort=0;
for (int i=0; i<=chainptr; i++) {
if (chain[i]<0)
TotalDistort+=cf[-chain[i]-1].N()*cf[-chain[i]-1].Radius();
else TotalDistort+=entries[chain[i]-1].N()*entries[chain[i]-1].Radius();
}
return TotalDistort;
}
double Hierarchy::NmaxFtMerged(short ftype)
{
double NmaxFt;
int maxn;
maxn = 0; NmaxFt=0;
for (int i=0; i<=chainptr; i++) {
if (chain[i]<0 && cf[-chain[i]-1].N()>maxn) {
maxn = cf[-chain[i]-1].N();
NmaxFt = cf[-chain[i]-1].Fitness(ftype);
}
}
return NmaxFt;
}
double Hierarchy::TotalFtDEdge(short ftype, short d, Entry *entries)
{
double TotalFtD=0;
for (int i=0; i<=chainptr; i++) {
if (chain[i]<0)
TotalFtD+=pow(cf[-chain[i]-1].Fitness(ftype),1.0*d);
else TotalFtD+=pow(entries[chain[i]-1].Fitness(ftype),1.0*d);
}
return TotalFtD;
}
/***********************************************************
This is an O(n^3) algorithm, but works fine for all 5
(D0,D1,D2,D3,D4) distance definitions.
It is used to make sure Hierarchy1 and Hierarchy0 can
produce the same results for debugging purpose.
************************************************************/
void Hierarchy0(int &n, // final number of clusters
const int K, // final number of clusters
Entry **entries,
short GDtype,
short Ftype,
double Ft)
{
if (n<=1) return;
int i, j, imin, jmin, done;
short *checked = new short[n];
memset(checked,0,n*sizeof(short));
// 0: unchecked;
// -1: exceeds the given threshold if merged with nearest neighbor;
// -2: nonexistant after merging.
double *dist = new double[n*(n-1)/2];
double d, dmin;
Entry tmpent;
tmpent.Init((*entries)[0].sx.dim);
dmin = HUGE; // compute all initial distances and closest pair
for (i=0; i<n-1; i++)
for (j=i+1; j<n; j++) {
d = distance(GDtype,(*entries)[i],(*entries)[j]);
dist[i*n-i*(i+1)/2+j-i-1] = d;
if (d<dmin) {
dmin = d;
imin = i;
jmin = j;
}
}
if (K==0) {// ****** case 1 ****** cluster by threshold ft
done = FALSE;
while (done==FALSE) {
tmpent.Add((*entries)[imin],(*entries)[jmin]);
if (tmpent.Fitness(Ftype) < Ft) {
// within the threshold
(*entries)[imin] += (*entries)[jmin];
checked[jmin] = -2;
for (i=0; i<imin; i++) {
if (checked[i]==0) {
dist[i*n-i*(i+1)/2+imin-i-1] =
distance(GDtype,(*entries)[i],(*entries)[imin]);
}}
for (j=imin+1; j<n; j++) {
if (checked[j]==0) {
dist[imin*n-imin*(imin+1)/2+j-imin-1] =
distance(GDtype,(*entries)[imin],(*entries)[j]);
}}
}
else {
// exceeds the threshold
checked[imin] = -1;
checked[jmin] = -1;
}
done = TRUE;
dmin = HUGE;
for (i=0; i<n-1; i++) {
if (checked[i]==0) {
for (j=i+1; j<n; j++) {
if (checked[j]==0) {
d = dist[i*n-i*(i+1)/2+j-i-1];
if (d<dmin) {
done = FALSE;
dmin = d;
imin = i;
jmin = j;
}}}}}
} // end of while
} // end of if
else { // ***** case 2 ***** cluster by number k
done = n;
while (done > K) {
(*entries)[imin] += (*entries)[jmin];
checked[jmin] = -2;
done--;
for (i=0; i<imin; i++) {
if (checked[i]==0) {
dist[i*n-i*(i+1)/2+imin-i-1] =
distance(GDtype,(*entries)[i],(*entries)[imin]);
}}
for (j=imin+1; j<n; j++) {
if (checked[j]==0) {
dist[imin*n-imin*(imin+1)/2+j-imin-1] =
distance(GDtype,(*entries)[imin],(*entries)[j]);
}}
dmin = HUGE;
for (i=0; i<n-1; i++) {
if (checked[i]==0) {
for (j=i+1; j<n; j++) {
if (checked[j]==0) {
d = dist[i*n-i*(i+1)/2+j-i-1];
if (d<dmin) {
dmin = d;
imin = i;
jmin = j;
}}}}}
} // end of while
} // end of else
j = 0;
for (i=0; i<n; i++)
if (checked[i]!=-2) {
(*entries)[j]=(*entries)[i];
j++;
}
n=j;
delete [] checked;
delete [] dist;
}
/*********************************************************************
This is an O(n^2) algorithm, but works only for D2 and D4 due to the
"REDUCIBILITY PROPERTY" that is given as below:
if d(i,j) < p, d(i,k) > p, d(j,k) > p then d(i+j,k) > p.
Algorithm:
Part 1: Form the complete hierarchy in O(n^2) time
step1: Pick any cluster i[1].
step2: Determine the chain i[2]=NN(i[1]), i[3]=NN(i[2]) ...
until i[k]=NN(i[k-1]) and i[k-1]=NN(i[k]), where NN
means nearest neighbor.
step3: Merge clusters i[k-1] and i[k].
step4: If more than 1 clusters left, goto step using i[k-2]
as start if (k-2>0), otherwise choose arbitrary i[1].
Part 2: Split from the complete hierarchy in O(n^2)
by number of clusters K,
by the threshold Ft,
by tree height H,
...
*********************************************************************/
void Hierarchy1(int &n, // final number of clusters
const int K, // final number of clusters
Entry **entries,// array storing clusters
short GDtype,
short Ftype,
double Ft)
{
if (GDtype!=D2 && GDtype!=D4)
print_error("Hierarchy1","only support distance D2 and D4");
if (n<=1) return;
if (n==K) return;
// ****************** Part 1 **********************
Hierarchy *h = new Hierarchy(n-1,(*entries)[0].sx.dim);
h->MergeHierarchy(GDtype,n,*entries);
// ******************* Part 2 *********************
if (K==0) { // by the threshold
while (h->SplitHierarchy(BY_THRESHOLD,Ftype,Ft)==TRUE);
}
else { // by the number of clusters
while (h->chainptr+1<K &&
h->SplitHierarchy(BY_KCLUSTERS,Ftype,Ft)==TRUE);
}
// ******************* Part 3 *********************
int j;
Entry *tmpentries = new Entry[h->chainptr+1];
for (j=0;j<=h->chainptr;j++)
tmpentries[j].Init((*entries)[0].sx.dim);
for (j=0;j<=h->chainptr;j++) {
if (h->chain[j]<0)
tmpentries[j] = h->cf[-h->chain[j]-1];
else tmpentries[j] = (*entries)[h->chain[j]-1];
}
delete [] *entries;
*entries = tmpentries;
n=h->chainptr+1;
delete h;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -