⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans(linux下).cpp

📁 经典的划分聚类算法Kmeans。包能能用
💻 CPP
字号:
/*****************************************************************************                                                                           **  KMEANS                                                                   **                                                                           ******************************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>//#include <conio.h>#include <math.h>// FUNCTION PROTOTYPES// DEFINES#define         SUCCESS         1#define         FAILURE         0#define         TRUE            1#define         FALSE           0#define         MAXVECTDIM      20//数据最多有多少维#define         MAXPATTERN      20//最多有多少组数据#define         MAXCLUSTER      10//最多分为多少个类//组装输出字符串char *f2a(double x, int width){  	char cbuf[255];  	char *cp;  	int i,k;  	int d,s;  	cp=fcvt(x,width,&d,&s);  	if (s) {    		strcpy(cbuf,"-");  	}  	else {   	 	strcpy(cbuf," ");  	} /* endif */ 	 if (d>0) {	    for (i=0; i<d; i++) {	      cbuf[i+1]=cp[i];	    } /* endfor */	    cbuf[d+1]=0;	    cp+=d;	    strcat(cbuf,".");	    strcat(cbuf,cp);	  } 	else {		    if (d==0) {		      strcat(cbuf,".");		      strcat(cbuf,cp);		    } else {		      k=-d;		      strcat(cbuf,".");		      for (i=0; i<k; i++) {			strcat(cbuf,"0");		      } /* endfor */		      strcat(cbuf,cp);		    } /* endif */ 		 } /* endif */	cp=&cbuf[0];	return cp;}// ***** Defined structures & classes *****定义类结构struct aCluster {  double       Center[MAXVECTDIM];//聚类中心  int          Member[MAXPATTERN];  //Index of Vectors belonging to this cluster属于这个类的样本  int          NumMembers;	//本类中的数据量};struct aVector {  double       Center[MAXVECTDIM];  int          Size;};class System {private:  double       Pattern[MAXPATTERN][MAXVECTDIM+1];  aCluster     Cluster[MAXCLUSTER];  int          NumPatterns;          //数据数  int          SizeVector;           // 维数  int          NumClusters;          // 类别数  void         DistributeSamples();  // Step 2 of K-means algorithm  int          CalcNewClustCenters();// 计算新的聚类中心  double       EucNorm(int, int);   // 计算欧几里得距离  int          FindClosestCluster(int); //找到于当前样本最近的类中心  //whose index is argpublic:  //System();  int LoadPatterns(char *fname);      // 加载将要聚类的数据  void InitClusters();                // 初始化KMeans聚类算法  void RunKMeans();                   // 开始运行KMeans据类算法  void ShowClusters();                // 输出聚类结果  void SaveClusters(char *fname);     //存储聚类结果  void ShowCenters();//输出聚类中心};//输出聚类中心void System::ShowCenters(){  int i,j;  printf("Cluster centers:\n");  for (i=0; i<NumClusters; i++) {    //Cluster[i].Member[0]=i;    printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);  } /* endfor */  printf("\n");}//加载数据int System::LoadPatterns(char *fname){  FILE *InFilePtr;  int    i,j;  double x;  if((InFilePtr = fopen(fname, "r")) == NULL)    return FAILURE;  fscanf(InFilePtr, "%d", &NumPatterns);  // 读取数据数  fscanf(InFilePtr, "%d", &SizeVector);   // 读取数据维数  fscanf(InFilePtr, "%d", &NumClusters);  // 读取类别数//读取将要进行聚类的数据  for (i=0; i<NumPatterns; i++) {         // For each vector    for (j=0; j<SizeVector; j++) {       // create a pattern      fscanf(InFilePtr,"%lg",&x);       // consisting of all elements      Pattern[i][j]=x;    } /* endfor */  } /* endfor */  printf("Input patterns:\n");  for (i=0; i<NumPatterns; i++) {    printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);  } /* endfor */  printf("\n--------------------\n");  return SUCCESS;}//***************************************************************************// InitClusters                                                             *//   Arbitrarily assign a vector to each of the K clusters                  *//   We choose the first K vectors to do this                               *//  初始化聚类//***************************************************************************void System::InitClusters(){  int i,j;  printf("Initial cluster centers:\n");//把数据集中的第i个数据分给第i个类,作为这个类的第一个数据  for (i=0; i<NumClusters; i++) {	    Cluster[i].Member[0]=i;	    for (j=0; j<SizeVector; j++) {	      Cluster[i].Center[j]=Pattern[i][j];	    } /* endfor */  } /* endfor *///输出  for (i=0; i<NumClusters; i++) {    printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);  } /* endfor */  printf("\n");}//开始聚类void System::RunKMeans(){  int converged;  int pass;  pass=1;  converged=FALSE;  while (converged==FALSE) {    printf("PASS=%d\n",pass++);    DistributeSamples();    converged=CalcNewClustCenters();    ShowCenters();  } /* endwhile */}double System::EucNorm(int p, int c)    // Calc Euclidean norm of vector difference计算欧几里得距离:数据点p和类别c之间的距离{  double dist,x;                          // between pattern vector, p, and cluster  int i;                                  // center, c.  char zout[128];  char znum[40];  char *pnum;  pnum=&znum[0];  strcpy(zout,"d=sqrt(");  printf("The distance from pattern %d to cluster %d is calculated as:\n",p,c);  dist=0;  for (i=0; i<SizeVector ;i++){		x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);		strcat(zout,f2a(x,4));		if (i==0)		strcat(zout,"+");		dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);  } /* endfor */  printf("%s)\n",zout);  return dist;}int System::FindClosestCluster(int pat)//找到最近的类别,返回类别编号{  int i, ClustID;  double MinDist, d;  MinDist =9.9e+99;  ClustID=-1;  for (i=0; i<NumClusters; i++) {    d=EucNorm(pat,i);    printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));    if (d<MinDist) {      MinDist=d;      ClustID=i;    } /* endif */  } /* endfor */  if (ClustID<0) {    printf("Aaargh");    exit(0);  } /* endif */  return ClustID;}//重新分配数据点void System::DistributeSamples(){  int i,pat,Clustid,MemberIndex;  //Clear membership list for all current clusters  for (i=0; i<NumClusters;i++){	 Cluster[i].NumMembers=0;  }  for (pat=0; pat<NumPatterns; pat++) {    //Find cluster center to which the pattern is closest    Clustid= FindClosestCluster(pat);    printf("patern %d assigned to cluster %d\n\n",pat,Clustid);    //post this pattern to the cluster    MemberIndex=Cluster[Clustid].NumMembers;    Cluster[Clustid].Member[MemberIndex]=pat;    Cluster[Clustid].NumMembers++;  } /* endfor */}//形成新的聚类中心,若聚类中心不变,就返回真值int  System::CalcNewClustCenters(){  int ConvFlag,VectID,i,j,k;  double tmp[MAXVECTDIM];  char xs[255];  char ys[255];  char nc1[20];  char nc2[20];  char *pnc1;  char *pnc2;  char *fpv;  pnc1=&nc1[0];  pnc2=&nc2[0];  ConvFlag=TRUE;  printf("The new cluster centers are now calculated as:\n");  for (i=0; i<NumClusters; i++) {              //for each cluster    //pnc1=itoa(Cluster[i].NumMembers,nc1,10);    sprintf(nc1,"%d",Cluster[i].NumMembers);    pnc1 = nc1;    //pnc2=itoa(i,nc2,10);    sprintf(nc2,"%d",i);    pnc2 = nc2;    strcpy(xs,"Cluster Center");    strcat(xs,nc2);    strcat(xs,"(1/");    strcpy(ys,"(1/");    strcat(xs,nc1);    strcat(ys,nc1);    strcat(xs,")(");    strcat(ys,")(");    for (j=0; j<SizeVector; j++) {            // clear workspace      tmp[j]=0.0;    } /* endfor */    for (j=0; j<Cluster[i].NumMembers; j++)	 { //traverse member vectors	      VectID=Cluster[i].Member[j];	      for (k=0; k<SizeVector; k++) 		{         //traverse elements of vector			tmp[k] += Pattern[VectID][k];       // add (member) pattern elmnt into temp			if (k==0) 			{		 		 strcat(xs,f2a(Pattern[VectID][k],3));			} 			else 			{		  		strcat(ys,f2a(Pattern[VectID][k],3));			} /* endif */	      } /* endfor */	      if(j<Cluster[i].NumMembers-1){		strcat(xs,"+");		strcat(ys,"+");	      }	      else {		strcat(xs,")");		strcat(ys,")");	      }    } /* endfor */    for (k=0; k<SizeVector; k++)	 {            		//traverse elements of vector	    	  tmp[k]=tmp[k]/Cluster[i].NumMembers;	     	 if (tmp[k] != Cluster[i].Center[k])				ConvFlag=FALSE;	    	  Cluster[i].Center[k]=tmp[k];   	 } /* endfor */   	 printf("%s,\n",xs);   	 printf("%s\n",ys);	  } /* endfor */	  return ConvFlag;}void System::ShowClusters(){  int cl;  for (cl=0; cl<NumClusters; cl++) {    printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);  } /* endfor */}void System::SaveClusters(char *fname){}main(int argc, char *argv[]) {
	  System kmeans;  char* address = "km2.dat";  if (kmeans.LoadPatterns(address)==FAILURE ){    printf("UNABLE TO READ PATTERN_FILE:%s\n",address);    exit(0);  }  kmeans.InitClusters();  kmeans.RunKMeans();  kmeans.ShowClusters();}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -