⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmean.cpp

📁 k均值算法实现聚类 c语言编写
💻 CPP
字号:
/****************************************************************************
*                                                                           *
*  K平均算法                                                              *
*                                                                           *
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>


// 定义
#define         SUCCESS         1
#define         FAILURE         0
#define         TRUE            1
#define         FALSE           0
#define         MAXVECTDIM      20
#define         MAXPATTERN      20
#define         MAXCLUSTER      10



char *f2a(double x, int width)
{
   char cbuf[255];
   char *cp;
   int i,k;
   int d,s;
cp=fcvt(x,width,&d,&s);
if (s) 
{
   strcpy(cbuf,"-");
   }
 else 
 {
   strcpy(cbuf," ");
 }                             
if (d>0) 
{
   for (i=0; i<d; i++) 
   {
      cbuf[i+1]=cp[i];
   }                           
   cbuf[d+1]=0;
   cp+=d;
   strcat(cbuf,".");
   strcat(cbuf,cp);
   } 
else 
{
      if (d==0) 
	  {
         strcat(cbuf,".");
         strcat(cbuf,cp);
         } 
       else
	   {
         k=-d;
         strcat(cbuf,".");
         for (i=0; i<k; i++) 
		 {
            strcat(cbuf,"0");
		 }                             
         strcat(cbuf,cp);
	   }                                 
}                                         
cp=&cbuf[0];
return cp;
}




// 定义结构和类
struct aCluster 
{
   double       Center[MAXVECTDIM];
   int          Member[MAXPATTERN];  //某一聚类中各个所属成员矢量的索引
   int          NumMembers;
};

struct aVector 
{
   double       Center[MAXVECTDIM];
   int          Size;
};

class System 
{
private:
   double       Pattern[MAXPATTERN][MAXVECTDIM+1];
   aCluster     Cluster[MAXCLUSTER];
   int          NumPatterns;          // 模式数
   int          SizeVector;           // 矢量维数
   int          NumClusters;          // 聚类数
   void         DistributeSamples();  // k均值算法第二步
   int          CalcNewClustCenters();// 第三步
   double       EucNorm(int, int);   // 计算欧几里得距离
   int          FindClosestCluster(int); //将矢量归入离其聚类最近的聚类
public:
   system();
   int LoadPatterns(char *fname);      // 读入模式数
   void InitClusters();                // k均值算法的第一步
   void RunKMeans();                   // k均值算法的全局控制函数
   void ShowClusters();                // 在屏幕上显示结果
   void SaveClusters(char *fname);     // 保存结果
   void ShowCenters();
};

void System::ShowCenters()
{
int i,j;
printf("Cluster centers:\n");
for (i=0; i<NumClusters; i++) 
{
   Cluster[i].Member[0]=i;
   printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
}                                                    
printf("\n");
}

int System::LoadPatterns(char *fname)
{
   FILE *InFilePtr;
   int    i,j;
   double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
    return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns);  // 读数据类型
fscanf(InFilePtr, "%d", &SizeVector);   // 读矢量维数
fscanf(InFilePtr, "%d", &NumClusters);  // 读k均值算法分类的类数
for (i=0; i<NumPatterns; i++) {         // 对每一个模式
   for (j=0; j<SizeVector; j++) {       
      fscanf(InFilePtr,"%lg",&x);       // 该k分类算法处理了所有待分类的模式
      Pattern[i][j]=x;
   }                                    
}                                        
printf("Input patterns:\n");
for (i=0; i<NumPatterns; i++) {
   printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
}                                       
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// 初始化聚类                                                           *
//   将某矢量任意分配给k个聚类(选前k个),此算法中k=2                 *
//***************************************************************************
void System::InitClusters()
{
int i,j;
printf("Initial cluster centers:\n");
for (i=0; i<NumClusters; i++) 
{
   Cluster[i].Member[0]=i;
   for (j=0; j<SizeVector; j++) 
   {
      Cluster[i].Center[j]=Pattern[i][j];
   }                                                       
}                                                              
for (i=0; i<NumClusters; i++) 
{
   printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
}                                                         
printf("\n");
}

void System::RunKMeans()
{
  int converged;
  int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) 
{
   printf("PASS=%d\n",pass++);
   DistributeSamples();
   converged=CalcNewClustCenters();
   ShowCenters();
}                                                            
}

double System::EucNorm(int p, int c)     // 计算矢量p与聚类之间的欧几里得距离,c为聚类中心
{   
double dist,x;                          
int i;                                  
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; i<SizeVector ;i++)
{
   x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
   strcat(zout,f2a(x,4));
   if (i==0)
      strcat(zout,"+");
   dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
}                                                            
printf("%s)\n",zout);
return dist;
}

int System::FindClosestCluster(int pat)
{
   int i, ClustID;
   double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) 
{
   d=EucNorm(pat,i);
   printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
   if (d<MinDist) 
   {
      MinDist=d;
      ClustID=i;
   }                                                     
}                                                        
if (ClustID<0)
 {
   printf("Aaargh");
   exit(0);
}                                                       
return ClustID;
}

void System::DistributeSamples()
{
int i,pat,Clustid,MemberIndex;
for (i=0; i<NumClusters;i++){
   Cluster[i].NumMembers=0;
   }
for (pat=0; pat<NumPatterns; pat++)           //找出与模式最近的聚类中心
{
   Clustid= FindClosestCluster(pat);
   printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
   //将该模式归入此聚类
   MemberIndex=Cluster[Clustid].NumMembers;
   Cluster[Clustid].Member[MemberIndex]=pat;
   Cluster[Clustid].NumMembers++;
}                                                     
}

int  System::CalcNewClustCenters()
{
   int ConvFlag,VectID,i,j,k;
   double tmp[MAXVECTDIM];
   char xs[255];
   char ys[255];
   char nc1[20];
   char nc2[20];
   char *pnc1;
   char *pnc2;
   char *fpv;

pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; i<NumClusters; i++)                              //对每一个聚类进行处理
{              
   pnc1=itoa(Cluster[i].NumMembers,nc1,10);
   pnc2=itoa(i,nc2,10);
   strcpy(xs,"Cluster Center");
   strcat(xs,nc2);
   strcat(xs,"(1/");
   strcpy(ys,"(1/");
   strcat(xs,nc1);
   strcat(ys,nc1);
   strcat(xs,")(");
   strcat(ys,")(");
   for (j=0; j<SizeVector; j++)                         // 清除工作区
   {            
      tmp[j]=0.0;
   }                                                      
   for (j=0; j<Cluster[i].NumMembers; j++)                //遍历各个聚类中的矢量成员
   { 
      VectID=Cluster[i].Member[j];
      for (k=0; k<SizeVector; k++) 
	  {                   
         tmp[k] += Pattern[VectID][k];                  
         if (k==0) {
              strcat(xs,f2a(Pattern[VectID][k],3));
            } else {
              strcat(ys,f2a(Pattern[VectID][k],3));
			}                                            
	  }                                                  
      if(j<Cluster[i].NumMembers-1)
	  {
         strcat(xs,"+");
         strcat(ys,"+");
         }
        else 
		{
         strcat(xs,")");
         strcat(ys,")");
         }
   }                                                        
   for (k=0; k<SizeVector; k++)                             //遍历矢量中的成员要素    
   {            
      tmp[k]=tmp[k]/Cluster[i].NumMembers;
      if (tmp[k] != Cluster[i].Center[k])
         ConvFlag=FALSE;
      Cluster[i].Center[k]=tmp[k];
   }                                                        
   printf("%s,\n",xs);
   printf("%s\n",ys);
}                                                           
return ConvFlag;
}

void System::ShowClusters()
{
   int cl;
for (cl=0; cl<NumClusters; cl++) 
{
   printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
}                                                           
}

void System::SaveClusters(char *fname)
{
}


main(int argc, char *argv[]) 
{
   System kmeans;
if (argc<2) {
   printf("USAGE: KMEANS PATTERN_FILE\n");
   exit(0);
   }
if (kmeans.LoadPatterns(argv[1])==FAILURE )
{
   printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
   exit(0);
   }
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -