📄 kmeans(linux下).cpp
字号:
/***************************************************************************** ** KMEANS ** ******************************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>//#include <conio.h>#include <math.h>// FUNCTION PROTOTYPES// DEFINES#define SUCCESS 1#define FAILURE 0#define TRUE 1#define FALSE 0#define MAXVECTDIM 20//数据最多有多少维#define MAXPATTERN 20//最多有多少组数据#define MAXCLUSTER 10//最多分为多少个类//组装输出字符串char *f2a(double x, int width){ char cbuf[255]; char *cp; int i,k; int d,s; cp=fcvt(x,width,&d,&s); if (s) { strcpy(cbuf,"-"); } else { strcpy(cbuf," "); } /* endif */ if (d>0) { for (i=0; i<d; i++) { cbuf[i+1]=cp[i]; } /* endfor */ cbuf[d+1]=0; cp+=d; strcat(cbuf,"."); strcat(cbuf,cp); } else { if (d==0) { strcat(cbuf,"."); strcat(cbuf,cp); } else { k=-d; strcat(cbuf,"."); for (i=0; i<k; i++) { strcat(cbuf,"0"); } /* endfor */ strcat(cbuf,cp); } /* endif */ } /* endif */ cp=&cbuf[0]; return cp;}// ***** Defined structures & classes *****定义类结构struct aCluster { double Center[MAXVECTDIM];//聚类中心 int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster属于这个类的样本 int NumMembers; //本类中的数据量};struct aVector { double Center[MAXVECTDIM]; int Size;};class System {private: double Pattern[MAXPATTERN][MAXVECTDIM+1]; aCluster Cluster[MAXCLUSTER]; int NumPatterns; //数据数 int SizeVector; // 维数 int NumClusters; // 类别数 void DistributeSamples(); // Step 2 of K-means algorithm int CalcNewClustCenters();// 计算新的聚类中心 double EucNorm(int, int); // 计算欧几里得距离 int FindClosestCluster(int); //找到于当前样本最近的类中心 //whose index is argpublic: //System(); int LoadPatterns(char *fname); // 加载将要聚类的数据 void InitClusters(); // 初始化KMeans聚类算法 void RunKMeans(); // 开始运行KMeans据类算法 void ShowClusters(); // 输出聚类结果 void SaveClusters(char *fname); //存储聚类结果 void ShowCenters();//输出聚类中心};//输出聚类中心void System::ShowCenters(){ int i,j; printf("Cluster centers:\n"); for (i=0; i<NumClusters; i++) { //Cluster[i].Member[0]=i; printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]); } /* endfor */ printf("\n");}//加载数据int System::LoadPatterns(char *fname){ FILE *InFilePtr; int i,j; double x; if((InFilePtr = fopen(fname, "r")) == NULL) return FAILURE; fscanf(InFilePtr, "%d", &NumPatterns); // 读取数据数 fscanf(InFilePtr, "%d", &SizeVector); // 读取数据维数 fscanf(InFilePtr, "%d", &NumClusters); // 读取类别数//读取将要进行聚类的数据 for (i=0; i<NumPatterns; i++) { // For each vector for (j=0; j<SizeVector; j++) { // create a pattern fscanf(InFilePtr,"%lg",&x); // consisting of all elements Pattern[i][j]=x; } /* endfor */ } /* endfor */ printf("Input patterns:\n"); for (i=0; i<NumPatterns; i++) { printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]); } /* endfor */ printf("\n--------------------\n"); return SUCCESS;}//***************************************************************************// InitClusters *// Arbitrarily assign a vector to each of the K clusters *// We choose the first K vectors to do this *// 初始化聚类//***************************************************************************void System::InitClusters(){ int i,j; printf("Initial cluster centers:\n");//把数据集中的第i个数据分给第i个类,作为这个类的第一个数据 for (i=0; i<NumClusters; i++) { Cluster[i].Member[0]=i; for (j=0; j<SizeVector; j++) { Cluster[i].Center[j]=Pattern[i][j]; } /* endfor */ } /* endfor *///输出 for (i=0; i<NumClusters; i++) { printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]); } /* endfor */ printf("\n");}//开始聚类void System::RunKMeans(){ int converged; int pass; pass=1; converged=FALSE; while (converged==FALSE) { printf("PASS=%d\n",pass++); DistributeSamples(); converged=CalcNewClustCenters(); ShowCenters(); } /* endwhile */}double System::EucNorm(int p, int c) // Calc Euclidean norm of vector difference计算欧几里得距离:数据点p和类别c之间的距离{ double dist,x; // between pattern vector, p, and cluster int i; // center, c. char zout[128]; char znum[40]; char *pnum; pnum=&znum[0]; strcpy(zout,"d=sqrt("); printf("The distance from pattern %d to cluster %d is calculated as:\n",p,c); dist=0; for (i=0; i<SizeVector ;i++){ x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]); strcat(zout,f2a(x,4)); if (i==0) strcat(zout,"+"); dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]); } /* endfor */ printf("%s)\n",zout); return dist;}int System::FindClosestCluster(int pat)//找到最近的类别,返回类别编号{ int i, ClustID; double MinDist, d; MinDist =9.9e+99; ClustID=-1; for (i=0; i<NumClusters; i++) { d=EucNorm(pat,i); printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d)); if (d<MinDist) { MinDist=d; ClustID=i; } /* endif */ } /* endfor */ if (ClustID<0) { printf("Aaargh"); exit(0); } /* endif */ return ClustID;}//重新分配数据点void System::DistributeSamples(){ int i,pat,Clustid,MemberIndex; //Clear membership list for all current clusters for (i=0; i<NumClusters;i++){ Cluster[i].NumMembers=0; } for (pat=0; pat<NumPatterns; pat++) { //Find cluster center to which the pattern is closest Clustid= FindClosestCluster(pat); printf("patern %d assigned to cluster %d\n\n",pat,Clustid); //post this pattern to the cluster MemberIndex=Cluster[Clustid].NumMembers; Cluster[Clustid].Member[MemberIndex]=pat; Cluster[Clustid].NumMembers++; } /* endfor */}//形成新的聚类中心,若聚类中心不变,就返回真值int System::CalcNewClustCenters(){ int ConvFlag,VectID,i,j,k; double tmp[MAXVECTDIM]; char xs[255]; char ys[255]; char nc1[20]; char nc2[20]; char *pnc1; char *pnc2; char *fpv; pnc1=&nc1[0]; pnc2=&nc2[0]; ConvFlag=TRUE; printf("The new cluster centers are now calculated as:\n"); for (i=0; i<NumClusters; i++) { //for each cluster //pnc1=itoa(Cluster[i].NumMembers,nc1,10); sprintf(nc1,"%d",Cluster[i].NumMembers); pnc1 = nc1; //pnc2=itoa(i,nc2,10); sprintf(nc2,"%d",i); pnc2 = nc2; strcpy(xs,"Cluster Center"); strcat(xs,nc2); strcat(xs,"(1/"); strcpy(ys,"(1/"); strcat(xs,nc1); strcat(ys,nc1); strcat(xs,")("); strcat(ys,")("); for (j=0; j<SizeVector; j++) { // clear workspace tmp[j]=0.0; } /* endfor */ for (j=0; j<Cluster[i].NumMembers; j++) { //traverse member vectors VectID=Cluster[i].Member[j]; for (k=0; k<SizeVector; k++) { //traverse elements of vector tmp[k] += Pattern[VectID][k]; // add (member) pattern elmnt into temp if (k==0) { strcat(xs,f2a(Pattern[VectID][k],3)); } else { strcat(ys,f2a(Pattern[VectID][k],3)); } /* endif */ } /* endfor */ if(j<Cluster[i].NumMembers-1){ strcat(xs,"+"); strcat(ys,"+"); } else { strcat(xs,")"); strcat(ys,")"); } } /* endfor */ for (k=0; k<SizeVector; k++) { //traverse elements of vector tmp[k]=tmp[k]/Cluster[i].NumMembers; if (tmp[k] != Cluster[i].Center[k]) ConvFlag=FALSE; Cluster[i].Center[k]=tmp[k]; } /* endfor */ printf("%s,\n",xs); printf("%s\n",ys); } /* endfor */ return ConvFlag;}void System::ShowClusters(){ int cl; for (cl=0; cl<NumClusters; cl++) { printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]); } /* endfor */}void System::SaveClusters(char *fname){}main(int argc, char *argv[]) {
System kmeans; char* address = "km2.dat"; if (kmeans.LoadPatterns(address)==FAILURE ){ printf("UNABLE TO READ PATTERN_FILE:%s\n",address); exit(0); } kmeans.InitClusters(); kmeans.RunKMeans(); kmeans.ShowClusters();}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -