📄 kmeans.cpp
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
// function prototypes
// defines
#define success 1
#define failure 0
#define true 1
#define false 0
#define maxvectdim 20
#define maxpattern 20
#define maxcluster 10
#define STUDYRATE 0.6
char *f2a(double x, int width){
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s);
if (s) {
strcpy(cbuf,"-");
}
else {
strcpy(cbuf," ");
} /* endif */
if (d>0) {
for (i=0; i<d; i++) {
cbuf[i+1]=cp[i];
} /* endfor */
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
} else {
if (d==0) {
strcat(cbuf,".");
strcat(cbuf,cp);
}
else {
k=-d;
strcat(cbuf,".");
for (i=0; i<k; i++) {
strcat(cbuf,"0");
} /* endfor */
strcat(cbuf,cp);
} /* endif */
} /* endif */
cp=&cbuf[0];
return cp;
}
// ***** defined structures & classes *****
struct acluster {
double center[maxvectdim];
int member[maxpattern]; //index of vectors belonging to this cluster
int nummembers;
};
struct avector {
double center[maxvectdim];
int size;
};
class System {
private:
double pattern[maxpattern][maxvectdim+1];
acluster cluster[maxcluster];
int numpatterns; // number of patterns
int sizevector; // number of dimensions in vector
int numclusters; // number of clusters
void distributesamples(); // step 2 of k-means algorithm
int calcnewclustcenters();// step 3 of k-means algorithm
double eucnorm(int, int); // calc euclidean norm vector
int findclosestcluster(int); //ret indx of clust closest to pattern
//whose index is arg
public:
system();
int loadpatterns(char *fname); // get pattern data to be clustered
void initclusters(); // step 1 of k-means algorithm
void runkmeans(); // overall control k-means process
void showclusters(); // show results on screen
void saveclusters(char *fname); // save results to file
void showcenters();
int CalFinalClustersCenter();
};
void System::showcenters(){
int i,j;
printf("cluster centers:\n");
for (i=0; i<numclusters; i++) {
cluster[i].member[0]=i;
printf("clustercenter[%d]=(%f,%f)\n",i,cluster[i].center[0],cluster[i].center[1]);
} /* endfor */
printf("\n");
}
int System::loadpatterns(char *fname){
FILE *infileptr;
int i,j;
double x;
if((infileptr = fopen(fname, "r")) == NULL)
return failure;
fscanf(infileptr, "%d", &numpatterns); // read # of patterns
fscanf(infileptr, "%d", &sizevector); // read dimension of vector
fscanf(infileptr, "%d", &numclusters); // read # of clusters for k-means
for (i=0; i<numpatterns; i++) { // for each vector
for (j=0; j<sizevector; j++) { // create a pattern
fscanf(infileptr,"%lg",&x); // consisting of all elements
pattern[i][j]=x;
} /* endfor */
} /* endfor */
printf("input patterns:\n");
for (i=0; i<numpatterns; i++) {
printf("pattern[%d]=(%2.3f,%2.3f)\n",i,pattern[i][0],pattern[i][1]);
} /* endfor */
printf("\n--------------------\n");
return success;
}
//***************************************************************************
// initclusters *
// arbitrarily assign a vector to each of the k clusters *
// we choose the first k vectors to do this *
//***************************************************************************
void System::initclusters()
{
int i,j;
int Flag = 0;
int Pass = 0;
printf("initial cluster centers:\n");
for (i=0; i<numclusters; i++)
{
cluster[i].member[0]=i;
for (j=0; j<sizevector; j++)
{
cluster[i].center[j]=pattern[i][j];
} /* endfor */
} /* endfor */
for (i=0; i<numclusters; i++)
{
printf("clustercenter[%d]=(%f,%f)\n",i,cluster[i].center[0],cluster[i].center[1]);
} /* endfor */
printf("\n");
while(Flag == 0)
{
Flag = CalFinalClustersCenter();
Pass++;
if(Pass > 20000)
{
printf("There is no answer of this question since the pass is: %d\n",Pass);
exit(0);
}
}
distributesamples();
}
int System :: CalFinalClustersCenter()
{
int i = 0;
int j = 0;
int p,q;
int ClusterId;
int Flag;
double tmp[maxcluster][maxvectdim];
/* 随机在总点数中找出一个点*/
p = rand()%numpatterns;
ClusterId = findclosestcluster(p);
printf("随机选取的第 %d 个点属于第 %d 类\n\n",p,ClusterId);
for(i = 0; i<numclusters;i++)
{
for(j = 0; j<sizevector;j++)
{
tmp[i][j] = cluster[i].center[j];
}
}
for(i = 0; i<sizevector;i++)
{
tmp[ClusterId][i] = cluster[ClusterId].center[i] + STUDYRATE * (pattern[p][i] - cluster[ClusterId].center[i]);
}
for(i = 0; i<numclusters;i++)
{
for(j = 0; j<sizevector;j++)
{
if((tmp[i][j] - cluster[i].center[j])*(tmp[i][j] - cluster[i].center[j]) < 0.0001)
{
Flag = 1;
}
else
{
Flag = 0;
for(q = 0; q<sizevector;q++)
{
cluster[ClusterId].center[q] = tmp[ClusterId][q];
}
break;
}
}
if(Flag == 0)
{
break;
}
}
return Flag;
}
void System::runkmeans()
{
int converged;
int pass;
pass=1;
converged=false;
while (converged==false)
{
printf("pass=%d\n",pass++);
if(pass >= 20000)
{
printf("There is no answer of this question since the pass is: %d\n",pass);
exit(0);
}
distributesamples();
converged=calcnewclustcenters();
showcenters();
} /* endwhile */
}
double System::eucnorm(int p, int c)
{ // calc euclidean norm of vector difference
double dist,x; // between pattern vector, p, and cluster
int i; // center, c.
char zout[128];
char znum[40];
char *pnum;
pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("the distance from pattern %d to cluster %d is calculated as:\n",p,c);
dist=0;
for (i=0; i<sizevector ;i++){
x=(cluster[c].center[i]-pattern[p][i])*(cluster[c].center[i]-pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
strcat(zout,"+");
dist += (cluster[c].center[i]-pattern[p][i])*(cluster[c].center[i]-pattern[p][i]);
} /* endfor */
printf("%s)\n",zout);
return dist;
}
int System::findclosestcluster(int pat){
int i, clustid;
double mindist, d;
mindist =9.9e+99;
clustid=-1;
for (i=0; i<numclusters; i++)
{
d=eucnorm(pat,i);
printf("distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (d<mindist)
{
mindist=d;
clustid=i;
} /* endif */
} /* endfor */
if (clustid<0)
{
printf("aaargh");
exit(0);
} /* endif */
return clustid;
}
void System::distributesamples()
{
int i,pat,clustid,memberindex;
//clear membership list for all current clusters
for (i=0; i<numclusters;i++)
{
cluster[i].nummembers=0;
}
for (pat=0; pat<numpatterns; pat++)
{
//find cluster center to which the pattern is closest
clustid= findclosestcluster(pat);
printf("patern %d assigned to cluster %d\n\n",pat,clustid);
//post this pattern to the cluster
memberindex=cluster[clustid].nummembers;
cluster[clustid].member[memberindex]=pat;
cluster[clustid].nummembers++;
} /* endfor */
}
int System::calcnewclustcenters(){
int convflag,vectid,i,j,k;
double tmp[maxvectdim];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
char *fpv;
pnc1=&nc1[0];
pnc2=&nc2[0];
convflag=true;
printf("the new cluster centers are now calculated as:\n");
for (i=0; i<numclusters; i++) { //for each cluster
pnc1=itoa(cluster[i].nummembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"cluster center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; j<sizevector; j++) { // clear workspace
tmp[j]=0.0;
} /* endfor */
for (j=0; j<cluster[i].nummembers; j++)
{ //traverse member vectors
vectid=cluster[i].member[j];
for (k=0; k<sizevector; k++)
{ //traverse elements of vector
tmp[k] += pattern[vectid][k]; // add (member) pattern elmnt into temp
if (k==0)
{
strcat(xs,f2a(pattern[vectid][k],3));
}
else
{
strcat(ys,f2a(pattern[vectid][k],3));
} /* endif */
} /* endfor */
if(j<cluster[i].nummembers-1){
strcat(xs,"+");
strcat(ys,"+");
}
else {
strcat(xs,")");
strcat(ys,")");
}
} /* endfor */
for (k=0; k<sizevector; k++)
{ //traverse elements of vector
tmp[k]=tmp[k]/cluster[i].nummembers;
if (tmp[k] != cluster[i].center[k])
convflag=false;
cluster[i].center[k]=tmp[k];
} /* endfor */
printf("%s,\n",xs);
printf("%s\n",ys);
} /* endfor */
return convflag;
}
void System::showclusters()
{
int cl,c2;
int byloop;
int byloop2;
int byIndex;
FILE *infileptr;
infileptr = fopen("KM2OUT.DAT", "wb");
for (cl=0; cl<numclusters; cl++)
{
printf("\ncluster %d ==>[%f,%f]\n", cl,cluster[cl].center[0],cluster[cl].center[1]);
fprintf(infileptr,"所对应的第%d类的中心点为:[%f,%f] ",cl,cluster[cl].center[0],cluster[cl].center[1]);
}
fprintf(infileptr,"K均值聚类分为:%d类 ",numclusters);
for(cl=0; cl < numclusters; cl++)
{
fprintf(infileptr,"其中属于第%d类的点的个数为:%d ",cl,cluster[cl].nummembers);
for(byloop=0;byloop<cluster[cl].nummembers;byloop++)
{
byIndex = cluster[cl].member[byloop];
fprintf(infileptr,"第%d个点",byIndex+1);
for(byloop2 = 0; byloop2<sizevector;byloop2++)
{
fprintf(infileptr,"%f ",pattern[byIndex][byloop2]);
}
}
}
}
void System::saveclusters(char *fname)
{
}
main(int argc, char *argv[])
{
System kmeans;
if (kmeans.loadpatterns("KMIn.DAT")==failure )
{
printf("unable to read pattern_file:%s\n",argv[1]);
exit(0);
}
kmeans.initclusters();
kmeans.showclusters();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -