⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clarans.c

📁 Solaris环境下的数据挖掘算法:birch聚类算法。该算法适用于对大量数据的挖掘。
💻 C
字号:
/****************************************************************File Name: clarans.C  Author: Tian Zhang, CS Dept., Univ. of Wisconsin-Madison, 1995               Copyright(c) 1995 by Tian Zhang                   All Rights ReservedPermission to use, copy and modify this software must be grantedby the author and provided that the above copyright notice appear in all relevant copies and that both that copyright notice and this permission notice appear in all relevant supporting documentations. Comments and additions may be sent the author at zhang@cs.wisc.edu.******************************************************************/#include <assert.h>#include "global.h"#include "util.h"#include "vector.h"#include "rectangle.h"#include "cfentry.h"#include "cutil.h"#include "parameter.h"#include "clarans.h"/* for CLARANS0 use only */static double local_search0(int n, Vector *entries, int **group, int k, int neighbor,int passi){int min, i, j, g, h, no_test;double dist, mindist, total_dist, djh, dji, cih ;int *oldgroup = new int[n];assert(oldgroup!=NULL);int *newgroup = new int[n];assert(newgroup!=NULL);int *tmpgroup;short *selected = new short[n];assert(selected!=NULL);memset(selected,0,n*sizeof(short));int *repre = new int[k];for (i=0; i<k; i++){ j = n/k*i+passi;  repre[i]=j;  selected[j]=1;  }	for (j=0, total_dist=0.0; j<n; j++) {	for (i=0, mindist = HUGE; i<k; i++) {		dist = entries[j]^entries[repre[i]];		if (dist<mindist) {			mindist = dist;			oldgroup[j]=i;			}		}	total_dist += mindist;	}for (; ;) {for (no_test=0; no_test<neighbor;) {i = rand() % k;h = rand() % n;if (selected[h]) continue;memcpy(newgroup,oldgroup,n*sizeof(int));for (j=0, cih=0; j<n; j++) {	djh = entries[j]^entries[h];	if (oldgroup[j]==i) {		dji = entries[j]^entries[repre[i]];		if (djh <= dji) cih += djh-dji;		else {		  for (g=0,mindist=HUGE;g<k;g++) {		   if (g==i) continue;		   dist = entries[j]^entries[repre[g]];			if (dist<mindist) {				mindist=dist;				min = g;				}			}		  if (mindist<djh) {			newgroup[j]=min;			cih += mindist-dji;			}		  else cih += djh-dji;		  }	}	else {		mindist = entries[j]^entries[repre[oldgroup[j]]];		if (djh < mindist) {			newgroup[j]=i;			cih += djh - mindist;			}		}	}if (cih >= 0.0) no_test++;else { // cih < 0 : improved	selected[repre[i]]=0;	selected[h]=1;	repre[i]=h;	tmpgroup = oldgroup;	oldgroup = newgroup;	newgroup = tmpgroup;	total_dist += cih;	break;	}}if (no_test==neighbor) break;}delete [] selected;delete [] repre;delete [] newgroup;*group = oldgroup;return(total_dist);}	/* for local_search1 use only */static void entrycpy(Entry *ents1, Entry *ents2, int k){for (int i=0;i<k;i++) 	ents1[i] = ents2[i];}/* for CLARANS1 use only */static double local_search1(int n, Entry *entries, Entry **clusters, int k, short dtype, short qtype, int neighbor, int passi){int i, j, g, h, min, no_test;double dist, mindist, total_dist, qua, min_qua, djh, dji, cih;int *repre = new int[k];assert(repre!=NULL);Entry *oldclusters = new Entry[k];assert(oldclusters!=NULL);for (i=0;i<k;i++) 	oldclusters[i].Init(entries[0].sx.dim);Entry *newclusters = new Entry[k];assert(newclusters!=NULL);for (i=0;i<k;i++) 	newclusters[i].Init(entries[0].sx.dim);Entry *tmpclusters;short *selected = new short[n];assert(selected!=NULL);memset(selected,0,n*sizeof(short));int *oldgroup = new int[n];assert(oldgroup!=NULL);int *newgroup = new int[n];assert(newgroup!=NULL);int *tmpgroup;// select initial medoids : not randomlyfor (i=0;i<k;i++) {	j = i*n/k+passi;	repre[i] = j;	oldclusters[i] = entries[j];	oldgroup[j] = i;	selected[j] = 1;	}// assign initial groups and clustersfor (i=0,total_dist=0;i<n;i++) {   if (selected[i]==1) continue;   for (j=0, mindist = HUGE;j<k;j++) {	dist = distance(dtype,entries[i],entries[repre[j]]);	if (dist < mindist) {		mindist = dist;		oldgroup[i]=j;		}	}   oldclusters[oldgroup[i]] += entries[i];   total_dist += mindist;   }min_qua = Quality(qtype,k,oldclusters);// random searchfor (; ; ) {for (no_test = 0; no_test < neighbor;) {	i = rand() % k;	h = rand() % n;	if (selected[h]) continue;	entrycpy(newclusters, oldclusters, k);	memcpy(newgroup, oldgroup, n * sizeof(int));	// for multiple distance definitions to work fine.	selected[repre[i]]=0;	selected[h]=1;	newgroup[h] = i;	newclusters[i] += entries[h];	newclusters[oldgroup[h]] -= entries[h];	for (j=0; j<n; j++) {	// scan data set	 if (selected[j]==1) continue;	 djh = distance(dtype,entries[j],entries[h]);	 if (oldgroup[j]==i) {		dji = distance(dtype,entries[j],entries[repre[i]]);		if (djh <= dji) cih += djh-dji;		else {			   for (g=0,mindist=HUGE; g<k; g++) {			if (g==i) continue;		 	dist = distance(dtype,entries[j],entries[repre[g]]);			if (dist < mindist) {				mindist = dist;				min = g;				}			}		   if (mindist<djh) {				newgroup[j]=min;				cih += mindist-dji;				newclusters[i] -= entries[j];				newclusters[min] += entries[j];				}		   else cih += djh-dji;		   }     		}	else { // (group[j]!=i)		mindist = distance(dtype,entries[j],entries[repre[oldgroup[j]]]);		// case1 djh >= djx : no operations		// case2 djh < djx  : do operations		if (djh < mindist) {		   newgroup[j] = i;		   cih += djh-mindist;		   newclusters[oldgroup[j]] -= entries[j];		   newclusters[i] += entries[j];		   }		}	} // end for j	qua = Quality(qtype, k, newclusters);	if (qua >= min_qua) {		selected[i]=1;		selected[h]=0;	 	no_test++;		}	else {		min_qua=qua;		total_dist += cih;		repre[i]=h;		tmpgroup = oldgroup;		oldgroup = newgroup;		newgroup = tmpgroup;		tmpclusters = oldclusters;		oldclusters = newclusters;		newclusters = tmpclusters;		break;		}      } // end for no_test      if (no_test==neighbor) break;   } // end of for (; ;)delete [] repre;delete [] selected;delete [] oldgroup;delete [] newgroup;delete [] oldclusters;*clusters = newclusters;return(total_dist);}	void Clarans0(int &n, const int K, Entry *entries){int *group, *min_group=NULL;int i;int no_local = 2;int no_neighbor = K*(n-K);double total_dist, min_total_dist;Vector *centroids = new Vector[n];for (i=0;i<n;i++) 	centroids[i].Init(entries[0].sx.dim);if (PERCENT(no_neighbor)>LOW_BOUND && PERCENT(no_neighbor)<HIGH_BOUND)         no_neighbor = (int) floor(PERCENT(no_neighbor));else if (PERCENT(no_neighbor)>=HIGH_BOUND) 	no_neighbor = HIGH_BOUND;     else if (no_neighbor>LOW_BOUND)         no_neighbor = LOW_BOUND;for (i=0; i<n; i++) centroids[i].Div(entries[i].sx,entries[i].n);for (i=0, min_total_dist=HUGE*K*n; i<no_local; i++){	total_dist = local_search0(n,centroids,&group,K,no_neighbor,i);	if (total_dist<min_total_dist) {		min_total_dist = total_dist;		if (min_group!=NULL) delete [] min_group;		min_group = group;		}}Entry *clusters = new Entry[K];for (i=0; i<K; i++) 	clusters[i].Init(entries[0].sx.dim);for (i=0; i<n; i++) 	clusters[min_group[i]] += entries[i];for (i=0; i<K; i++) 	entries[i]=clusters[i];n = K;delete [] min_group;delete [] clusters;delete [] centroids;}void Clarans1(int &n, const int K, Entry *entries, short GDtype, short Qtype){int i;int no_local = 2;int no_neighbor = K * (n - K);double min_qua,qua,min_total_dist,total_dist;Entry *clusters , *min_clusters=NULL;if (PERCENT(no_neighbor)>LOW_BOUND && PERCENT(no_neighbor)<HIGH_BOUND)         no_neighbor = (int) floor(PERCENT(no_neighbor));else if (PERCENT(no_neighbor)>=HIGH_BOUND) 	no_neighbor = HIGH_BOUND;     else if (no_neighbor>LOW_BOUND)         no_neighbor = LOW_BOUND;min_qua = HUGE * n * K;for (i=0, min_qua=HUGE*n*K, min_total_dist=HUGE*n*K;i<no_local;i++) {  total_dist = local_search1(n,entries,&clusters,K,GDtype,Qtype,no_neighbor,i);  qua = Quality(Qtype,K,clusters);  if (qua < min_qua) {	min_qua= qua;	if (min_clusters!=NULL) delete [] min_clusters;	min_clusters = clusters;	}  }for (i=0; i<K; i++) 	entries[i]=min_clusters[i];delete [] min_clusters;n = K;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -