⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.cpp

📁 神经网络中的K均值聚类算法II: 1.KMIn为输入数据文本
💻 CPP
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>

// function prototypes


// defines
#define         success         1
#define         failure         0
#define         true            1
#define         false           0
#define         maxvectdim      20
#define         maxpattern      20
#define         maxcluster      10
#define         STUDYRATE       0.6


char *f2a(double x, int width){
  char cbuf[255];
  char *cp;
  int i,k;
  int d,s;
cp=fcvt(x,width,&d,&s);
if (s) {
  strcpy(cbuf,"-");
  }
else {
  strcpy(cbuf," ");
  } /* endif */
if (d>0) {
  for (i=0; i<d; i++) {
     cbuf[i+1]=cp[i];
     } /* endfor */
  cbuf[d+1]=0;
  cp+=d;
  strcat(cbuf,".");
  strcat(cbuf,cp);
  } else {
     if (d==0) {
        strcat(cbuf,".");
        strcat(cbuf,cp);
        } 
      else {
        k=-d;
        strcat(cbuf,".");
        for (i=0; i<k; i++) {
           strcat(cbuf,"0");
           } /* endfor */
        strcat(cbuf,cp);
        } /* endif */
  } /* endif */
cp=&cbuf[0];
return cp;
}



// ***** defined structures & classes *****
struct acluster {
  double       center[maxvectdim];
  int          member[maxpattern];  //index of vectors belonging to this cluster
  int          nummembers;
};

struct avector {
  double       center[maxvectdim];
  int          size;
};

class System {
private:
  double       pattern[maxpattern][maxvectdim+1];
  acluster     cluster[maxcluster];
  int          numpatterns;          // number of patterns
  int          sizevector;           // number of dimensions in vector
  int          numclusters;          // number of clusters
  void         distributesamples();  // step 2 of k-means algorithm
  int          calcnewclustcenters();// step 3 of k-means algorithm
  double       eucnorm(int, int);   // calc euclidean norm vector
  int          findclosestcluster(int); //ret indx of clust closest to pattern
                                        //whose index is arg
public:
  system();
  int loadpatterns(char *fname);      // get pattern data to be clustered
  void initclusters();                // step 1 of k-means algorithm
  void runkmeans();                   // overall control k-means process
  void showclusters();                // show results on screen
  void saveclusters(char *fname);     // save results to file
  void showcenters();
  int CalFinalClustersCenter();
};

void System::showcenters(){
int i,j;
printf("cluster centers:\n");
for (i=0; i<numclusters; i++) {
  cluster[i].member[0]=i;
  printf("clustercenter[%d]=(%f,%f)\n",i,cluster[i].center[0],cluster[i].center[1]);
  } /* endfor */
printf("\n");
}

int System::loadpatterns(char *fname){
  FILE   *infileptr;
  int    i,j;
  double x;
if((infileptr = fopen(fname, "r")) == NULL)
   return failure;
fscanf(infileptr, "%d", &numpatterns);  // read # of patterns
fscanf(infileptr, "%d", &sizevector);   // read dimension of vector
fscanf(infileptr, "%d", &numclusters);  // read # of clusters for k-means
for (i=0; i<numpatterns; i++) {         // for each vector
  for (j=0; j<sizevector; j++) {       // create a pattern
     fscanf(infileptr,"%lg",&x);       // consisting of all elements
     pattern[i][j]=x;
     } /* endfor */
  } /* endfor */
printf("input patterns:\n");
for (i=0; i<numpatterns; i++) {
  printf("pattern[%d]=(%2.3f,%2.3f)\n",i,pattern[i][0],pattern[i][1]);
  } /* endfor */
printf("\n--------------------\n");
return success;
}
//***************************************************************************
// initclusters                                                             *
//   arbitrarily assign a vector to each of the k clusters                  *
//   we choose the first k vectors to do this                               *
//***************************************************************************
void System::initclusters()
{
    int i,j;
	int Flag = 0;
	int Pass = 0;
    printf("initial cluster centers:\n");
    for (i=0; i<numclusters; i++)
	{
        cluster[i].member[0]=i;
        for (j=0; j<sizevector; j++) 
		{
            cluster[i].center[j]=pattern[i][j];
		} /* endfor */
	} /* endfor */
    for (i=0; i<numclusters; i++) 
	{
        printf("clustercenter[%d]=(%f,%f)\n",i,cluster[i].center[0],cluster[i].center[1]);
	} /* endfor */
    printf("\n");
	while(Flag == 0)
    { 
		Flag = CalFinalClustersCenter(); 
        Pass++;
		if(Pass > 20000)
        {
		    printf("There is no answer of this question since the pass is: %d\n",Pass);
	        exit(0);   
		}
    }
    distributesamples();
}

int System :: CalFinalClustersCenter()
{
    
    int i = 0;
    int j = 0;
    int p,q;
	int ClusterId;
	int Flag;
	double tmp[maxcluster][maxvectdim];
    /* 随机在总点数中找出一个点*/
    p = rand()%numpatterns; 
    ClusterId = findclosestcluster(p);
    printf("随机选取的第 %d 个点属于第 %d 类\n\n",p,ClusterId);
    for(i = 0; i<numclusters;i++)
	{
	    for(j = 0; j<sizevector;j++)
		{
		    tmp[i][j] = cluster[i].center[j];
		}
	}
	for(i = 0; i<sizevector;i++)
    {
		tmp[ClusterId][i] = cluster[ClusterId].center[i] + STUDYRATE * (pattern[p][i] - cluster[ClusterId].center[i]);
    }
    for(i = 0; i<numclusters;i++)
	{
	    for(j = 0; j<sizevector;j++)
		{
		    if((tmp[i][j] - cluster[i].center[j])*(tmp[i][j] - cluster[i].center[j]) < 0.0001)
			{
			    Flag = 1;   
			}
			else
			{
		 	    Flag = 0;
				for(q = 0; q<sizevector;q++)
				{
				    cluster[ClusterId].center[q] = tmp[ClusterId][q];   
				}
                break;
			}
		}
	    if(Flag == 0)
        {
		    break;
		}
	}
    return Flag;
}
void System::runkmeans()
{
    int converged;
    int pass;
    
	pass=1;
    converged=false;
    while (converged==false) 
	{
        printf("pass=%d\n",pass++);
        if(pass >= 20000)
		{
            printf("There is no answer of this question since the pass is: %d\n",pass);
	        exit(0);
		}
        distributesamples();
        converged=calcnewclustcenters();
        showcenters();
	} /* endwhile */
}

double System::eucnorm(int p, int c)
{                                       // calc euclidean norm of vector difference
double dist,x;                          // between pattern vector, p, and cluster
int i;                                  // center, c.
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("the distance from pattern %d to cluster %d is calculated as:\n",p,c);
dist=0;
for (i=0; i<sizevector ;i++){
  x=(cluster[c].center[i]-pattern[p][i])*(cluster[c].center[i]-pattern[p][i]);
  strcat(zout,f2a(x,4));
  if (i==0)
     strcat(zout,"+");
  dist += (cluster[c].center[i]-pattern[p][i])*(cluster[c].center[i]-pattern[p][i]);
  } /* endfor */
printf("%s)\n",zout);
return dist;
}

int System::findclosestcluster(int pat){
  int i, clustid;
  double mindist, d;
mindist =9.9e+99;
clustid=-1;
for (i=0; i<numclusters; i++) 
{
  d=eucnorm(pat,i);
  printf("distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
  if (d<mindist) 
  {
     mindist=d;
     clustid=i;
  } /* endif */
} /* endfor */
if (clustid<0) 
{
  printf("aaargh");
  exit(0);
} /* endif */
return clustid;
}

void System::distributesamples()
{
int i,pat,clustid,memberindex;
//clear membership list for all current clusters
for (i=0; i<numclusters;i++)
{
  cluster[i].nummembers=0;
}
for (pat=0; pat<numpatterns; pat++) 
{
  //find cluster center to which the pattern is closest
  clustid= findclosestcluster(pat);
  printf("patern %d assigned to cluster %d\n\n",pat,clustid);
  //post this pattern to the cluster
  memberindex=cluster[clustid].nummembers;
  cluster[clustid].member[memberindex]=pat;
  cluster[clustid].nummembers++;
  } /* endfor */
}

int  System::calcnewclustcenters(){
  int convflag,vectid,i,j,k;
  double tmp[maxvectdim];
  char xs[255];
  char ys[255];
  char nc1[20];
  char nc2[20];
  char *pnc1;
  char *pnc2;
  char *fpv;

pnc1=&nc1[0];
pnc2=&nc2[0];
convflag=true;
printf("the new cluster centers are now calculated as:\n");
for (i=0; i<numclusters; i++) {              //for each cluster
  pnc1=itoa(cluster[i].nummembers,nc1,10);
  pnc2=itoa(i,nc2,10);
  strcpy(xs,"cluster center");
  strcat(xs,nc2);
  strcat(xs,"(1/");
  strcpy(ys,"(1/");
  strcat(xs,nc1);
  strcat(ys,nc1);
  strcat(xs,")(");
  strcat(ys,")(");
  for (j=0; j<sizevector; j++) {                 // clear workspace
     tmp[j]=0.0;
     } /* endfor */
  for (j=0; j<cluster[i].nummembers; j++) 
  {                                              //traverse member vectors
     vectid=cluster[i].member[j];
     for (k=0; k<sizevector; k++) 
	 {                                           //traverse elements of vector
        tmp[k] += pattern[vectid][k];            // add (member) pattern elmnt into temp
        if (k==0) 
		{
             strcat(xs,f2a(pattern[vectid][k],3));
        } 
		else 
		{
             strcat(ys,f2a(pattern[vectid][k],3));
        } /* endif */
     } /* endfor */
     if(j<cluster[i].nummembers-1){
        strcat(xs,"+");
        strcat(ys,"+");
        }
       else {
        strcat(xs,")");
        strcat(ys,")");
        }
     } /* endfor */
  for (k=0; k<sizevector; k++) 
  {                                            //traverse elements of vector
     tmp[k]=tmp[k]/cluster[i].nummembers;
     if (tmp[k] != cluster[i].center[k])
        convflag=false;
     cluster[i].center[k]=tmp[k];
  } /* endfor */
  printf("%s,\n",xs);
  printf("%s\n",ys);
  } /* endfor */
return convflag;
}

void System::showclusters()
{
  int cl,c2;
  int byloop;
  int byloop2;
  int byIndex;
  FILE   *infileptr;
  
  infileptr = fopen("KM2OUT.DAT", "wb");
  for (cl=0; cl<numclusters; cl++) 
  {
      printf("\ncluster %d ==>[%f,%f]\n", cl,cluster[cl].center[0],cluster[cl].center[1]);
      fprintf(infileptr,"所对应的第%d类的中心点为:[%f,%f]  ",cl,cluster[cl].center[0],cluster[cl].center[1]);
  }
  fprintf(infileptr,"K均值聚类分为:%d类  ",numclusters);
  for(cl=0; cl < numclusters; cl++)
  {
	  fprintf(infileptr,"其中属于第%d类的点的个数为:%d ",cl,cluster[cl].nummembers);   
      for(byloop=0;byloop<cluster[cl].nummembers;byloop++)
	  {
	      byIndex = cluster[cl].member[byloop];
		  fprintf(infileptr,"第%d个点",byIndex+1);
          for(byloop2 = 0; byloop2<sizevector;byloop2++)
		  {
		      fprintf(infileptr,"%f ",pattern[byIndex][byloop2]);
		  }
	  
	  }
  }

}

void System::saveclusters(char *fname)
{
}


main(int argc, char *argv[]) 
{
  System kmeans;
  if (kmeans.loadpatterns("KMIn.DAT")==failure )
{
    printf("unable to read pattern_file:%s\n",argv[1]);
    exit(0);
}
kmeans.initclusters();
kmeans.showclusters();
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -