⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.cpp

📁 矢量量化聚类程序
💻 CPP
字号:
/****************************************************************************
*                                                                           *
*  KMEANS                                                                   *
*                                                                           *
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>

// FUNCTION PROTOTYPES


// DEFINES
#define         SUCCESS         1
#define         FAILURE         0
#define         TRUE            1
#define         FALSE           0
#define         MAXVECTDIM      20
#define         MAXPATTERN      20
#define         MAXCLUSTER      10

char *f2a(double x, int width)
{
   char cbuf[255];
   char *cp;
   int i,k;
   int d,s;
   cp=fcvt(x,width,&d,&s);
   if (s) 
   {
   strcpy(cbuf,"-");
   }
   else 
   {
   strcpy(cbuf," ");
   } /* endif */
   if (d>0)
   {
     for (i=0; i<d; i++)
	 {
      cbuf[i+1]=cp[i];
     } /* endfor */
   cbuf[d+1]=0;
   cp+=d;
   strcat(cbuf,".");
   strcat(cbuf,cp);
   } 
   else
   {
      if (d==0)
	  {
         strcat(cbuf,".");
         strcat(cbuf,cp);
      } 
       else
	   {
         k=-d;
         strcat(cbuf,".");
         for (i=0; i<k; i++)
		 {
            strcat(cbuf,"0");
         } /* endfor */
         strcat(cbuf,cp);
       } /* endif */
   } /* endif */
   cp=&cbuf[0];
   return cp;
}

// ***** Defined structures & classes *****
  struct aCluster {
   double       Center[MAXVECTDIM];
   int          Member[MAXPATTERN];  //Index of Vectors belonging to this cluster属于这个类的矢量标号(每个类最多20个矢量?)
   int          NumMembers;//聚类过程中属于这个类的矢量数
  };

  struct aVector {
   double       Center[MAXVECTDIM];
   int          Size;
  };

  class System {
   private:
   double       Pattern[MAXPATTERN][MAXVECTDIM+1];
   aCluster     Cluster[MAXCLUSTER];
   int          NumPatterns;          // Number of patterns:训练矢量数
   int          SizeVector;           // Number of dimensions in vector矢量维数
   int          NumClusters;          // Number of clusters:聚类数
   void         DistributeSamples();  // Step 2 of K-means algorithm
   int          CalcNewClustCenters();// Step 3 of K-means algorithm
   double       EucNorm(int, int);   // Calc Euclidean norm vector
   int          FindClosestCluster(int); //ret indx of clust closest to pattern
                                         //whose index is arg
   public:
   system();
   int LoadPatterns(char *fname);      // Get pattern data to be clustered
   void InitClusters();                // Step 1 of K-means algorithm
   void RunKMeans();                   // Overall control K-means process
   void ShowClusters();                // Show results on screen
   void SaveClusters(char *fname);     // Save results to file
   void ShowCenters();
  };

  void System::ShowCenters()
  {
   int i;
   printf("Cluster centers:\n");
   for (i=0; i<NumClusters; i++) 
   {
   Cluster[i].Member[0]=i;
   printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
   } /* endfor */
   printf("\n");
  }

  int System::LoadPatterns(char *fname)
  {
   FILE *InFilePtr;
   int    i,j;
   double x;
   if((InFilePtr = fopen(fname, "r")) == NULL)
   return FAILURE;
   fscanf(InFilePtr, "%d", &NumPatterns);  // Read # of patterns
   fscanf(InFilePtr, "%d", &SizeVector);   // Read dimension of vector
   fscanf(InFilePtr, "%d", &NumClusters);  // Read # of clusters for K-Means
   for (i=0; i<NumPatterns; i++)
   {     // For each vector
      for (j=0; j<SizeVector; j++)
	  {       // create a pattern
         fscanf(InFilePtr,"%lg",&x);       // consisting of all elements
         Pattern[i][j]=x;//何处定义?
      } /* endfor */
   } /* endfor */
   printf("Input patterns:\n");
   for (i=0; i<NumPatterns; i++)
   {
       printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);//为何只显示前两维?
   } /* endfor */
   printf("\n--------------------\n");
   return SUCCESS;
  }
//***************************************************************************
// InitClusters                                                             *
//   Arbitrarily assign a vector to each of the K clusters                  *
//   We choose the first K vectors to do this                               *
//***************************************************************************
  void System::InitClusters()
  {
   int i,j;
   printf("Initial cluster centers:\n");
   for (i=0; i<NumClusters; i++) 
   {
      Cluster[i].Member[0]=i;//?
      for (j=0; j<SizeVector; j++) 
	  {
          Cluster[i].Center[j]=Pattern[i][j];
      } /* endfor */
   } /* endfor */
   for (i=0; i<NumClusters; i++) 
   {
       printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
   } /* endfor */
   printf("\n");
 }

  void System::RunKMeans()
  {
   int converged;
   int pass;
   pass=1;
   converged=FALSE;
   while (converged==FALSE)
   {
     printf("PASS=%d\n",pass++);
     DistributeSamples();
     converged=CalcNewClustCenters();
     ShowCenters();
   } /* endwhile */
  }

  double System::EucNorm(int p, int c)
  {   // Calc Euclidean norm of vector difference
   double dist,x;      // between pattern vector, p, and cluster center, c.
   int i;                                  
   char zout[128];
   char znum[40];
   char *pnum;

   pnum=&znum[0];
   strcpy(zout,"d=sqrt(");
   printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
   dist=0;
   for (i=0; i<SizeVector ;i++)
   {
       x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
       strcat(zout,f2a(x,4));
       if (i==0)
       strcat(zout,"+");
       dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
   } /* endfor */
   printf("%s)\n",zout);
   return dist;
  }  

 int System::FindClosestCluster(int pat)
 {
   int i, ClustID;
   double MinDist, d;
   MinDist =9.9e+99;
   ClustID=-1;
   for (i=0; i<NumClusters; i++)
   {
      d=EucNorm(pat,i);
      printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
      if (d<MinDist) 
	  {
        MinDist=d;
        ClustID=i;
	  } /* endif */
   } /* endfor */
   if (ClustID<0)
   {
     printf("Aaargh");
     exit(0);
   } /* endif */
  return ClustID;
 }

 void System::DistributeSamples()
 {
   int i,pat,Clustid,MemberIndex;
   //Clear membership list for all current clusters
   for (i=0; i<NumClusters;i++)
   {
      Cluster[i].NumMembers=0;
   }
   for (pat=0; pat<NumPatterns; pat++) 
   {
      //Find cluster center to which the pattern is closest
      Clustid= FindClosestCluster(pat);
      printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
      //post this pattern to the cluster
      MemberIndex=Cluster[Clustid].NumMembers;
      Cluster[Clustid].Member[MemberIndex]=pat;//?
      Cluster[Clustid].NumMembers++;
   } /* endfor */
 }

 int  System::CalcNewClustCenters()
 {
   int ConvFlag,VectID,i,j,k;
   double tmp[MAXVECTDIM];
   char xs[255];
   char ys[255];
   char nc1[20];
   char nc2[20];
   char *pnc1;
   char *pnc2;
   //char *fpv;

   pnc1=&nc1[0];
   pnc2=&nc2[0];
   ConvFlag=TRUE;
   printf("The new cluster centers are now calculated as:\n");
   for (i=0; i<NumClusters; i++)
   {              //for each cluster
       pnc1=itoa(Cluster[i].NumMembers,nc1,10);
       pnc2=itoa(i,nc2,10);
       strcpy(xs,"Cluster Center");
       strcat(xs,nc2);
       strcat(xs,"(1/");
       strcpy(ys,"(1/");
       strcat(xs,nc1);
       strcat(ys,nc1);
       strcat(xs,")(");
       strcat(ys,")(");
       for (j=0; j<SizeVector; j++) 
	   {            // clear workspace
           tmp[j]=0.0;
       } /* endfor */
       for (j=0; j<Cluster[i].NumMembers; j++) 
	   {   //traverse member vectors
           VectID=Cluster[i].Member[j];
           for (k=0; k<SizeVector; k++)
		   {         //traverse elements of vector
               tmp[k] += Pattern[VectID][k];       // add (member) pattern elmnt into temp
               if (k==0) 
			   {
                  strcat(xs,f2a(Pattern[VectID][k],3));
			   } 
			   else 
			   {
                 strcat(ys,f2a(Pattern[VectID][k],3));
               } /* endif */
		   } /* endfor */
          if(j<Cluster[i].NumMembers-1)
		  {
            strcat(xs,"+");
            strcat(ys,"+");
		  }
          else
		  {
            strcat(xs,")");
            strcat(ys,")");
		  }
      } /* endfor */
      for (k=0; k<SizeVector; k++)
	  {            //traverse elements of vector
          tmp[k]=tmp[k]/Cluster[i].NumMembers;
          if (tmp[k] != Cluster[i].Center[k])
              ConvFlag=FALSE;
          Cluster[i].Center[k]=tmp[k];
      } /* endfor */
      printf("%s,\n",xs);
      printf("%s\n",ys);
   } /* endfor */
   return ConvFlag;
 }

 void System::ShowClusters()
 {
   int cl;
   for (cl=0; cl<NumClusters; cl++)
   {
       printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
   } /* endfor */
 }

 void System::SaveClusters(char *fname)
 {
 }


 void main(int argc, char *argv[])
 {
   System kmeans;
   if (argc<2) 
   {
      printf("USAGE: KMEANS PATTERN_FILE\n");
      exit(0);
   }
   if (kmeans.LoadPatterns(argv[1])==FAILURE )
   {
      printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
      exit(0);
   }
   kmeans.InitClusters();
   kmeans.RunKMeans();
   kmeans.ShowClusters();
 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -