📄 cmean.cpp
字号:
//////////////////////////////////////////////////////////////////////////
//k-mean 聚类创建日期2006年5月22日
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <math.h>
long int posnum;
const int M=1000;
const double NUM_MAX=100000000;
const double NUM_MIN=0.00000001;
int knum;
int row;
int *numperclu;//每个簇的个数
//均值聚类数据结构
struct cluster
{
double *data;
int dataindex;
struct cluster *next;
};
typedef struct cluster ccnode;
typedef ccnode *ccLink;
ccLink kcluster;
//读文件,位置矩阵
double** readdata(char* s)
{
FILE* fp;
if ((fp=fopen(s,"rb"))==NULL)
{
printf("erro! cannot open file!\n");
exit(0);
}
int i,j;
double **tpos;
tpos= (double**)malloc(M*sizeof(double*));
for ( i=0;i<M;i++)
tpos[i] = (double*)malloc(M*sizeof(double));
char temp[100]={0};
char *p = NULL; double x;
for(i=0;!feof(fp);i++)
{
if((fgets(temp,100,fp))==NULL)
break;
p = temp;
j=0;
while(1)
{
x=atof(p);
tpos[i][j] = x;
j++;
if((p=strchr(p,' '))==NULL)
break;
p++;
}
}
posnum = i ; row = j;
double **pos;
pos= (double**)malloc(posnum*sizeof(double*));
for ( i=0;i<posnum;i++)
{
pos[i] = (double*)malloc(row*sizeof(double));
for(j=0;j<row;j++)
pos[i][j] = tpos[i][j];
free(tpos[i]);
}
free(tpos);
fclose(fp);
return pos;
}
void insertcluster(ccLink Head,double *key,int dataindex)
{
ccLink pointer;
pointer = Head;
// Head->data[0]++;
int i;
while (pointer->next!=NULL)
pointer=pointer->next;
ccLink New;
New = (ccLink)malloc(sizeof(ccnode));
pointer->next=New;
New->data=(double*)malloc(row*sizeof(double));
for (i=0;i<row;i++)
New->data[i]=key[i];
New->dataindex=dataindex;
New->next=NULL;
}
////////
double dist(double* d1,double *d2)
{
int i;double distant=0;
for (i=0;i<row;i++)
distant=distant+(d1[i]-d2[i])*(d1[i]-d2[i]);
distant=sqrt(distant);
return distant;
}
////////
int isexit(double** temp)
{
int i,j;double distant=0.0;
for (i=0;i<knum;i++)
{
/* for (j=0;j<row;j++)
{
printf("%lf ",temp[i][j]);
printf("%lf ",kcluster[i].data[j]);
}
*/
distant=distant+ dist(temp[i],kcluster[i].data);
}
printf("%lf\n",distant);
if (distant<NUM_MIN)
return 0;
else
return 1;
}
//重新计算均值并赋值
void calmean(ccLink Head)
{
int i;
int num=0;
double *sum=(double*)(malloc(row*sizeof(double)));
for (i=0;i<row;i++)
sum[i]=0;
ccLink pointer;
pointer=Head->next;
for (;pointer!=NULL;pointer=pointer->next)
{
for (i=0;i<row;i++)
sum[i]=sum[i]+pointer->data[i];
num++;
}
for (i=0;i<row;i++)
{
if (num!=0)
sum[i]=sum[i]/num;
else
sum[i]=0;
Head->data[i]=sum[i];
}
free(sum);
}
////////////////
double** initdata()
{
int i,j;
srand((unsigned)time(NULL));
double** pos;
pos = readdata("pos.txt");
numperclu = (int*)malloc(knum*sizeof(int));
for (i=0;i<knum;i++)
numperclu[i]=0;
int num;
int bestnum;
double min=NUM_MAX;
double distant;
kcluster=(ccLink)malloc(knum*sizeof(ccnode));
for (i=0;i<knum;i++)
kcluster[i].data=(double*)malloc(row*sizeof(double));
for (i=0;i<knum;i++)
{
num=rand()%posnum;
for (j=0;j<row;j++)
kcluster[i].data[j]=pos[num][j];
kcluster[i].dataindex = 0;
kcluster[i].next=NULL;
}
for (i=0;i<posnum;i++)
{
min = NUM_MAX;
for (j=0;j<knum;j++)
{
distant=dist(pos[i],kcluster[j].data);
if (distant<min)
{
min=distant;
bestnum=j;
}
}
insertcluster(&kcluster[bestnum],pos[i],i);
numperclu[bestnum]++;//簇内数目加一
}
for (i=0;i<knum;i++)
calmean(&kcluster[i]);
return pos;
}
//删除不在同一个簇内的节点,并查入到最近的簇中
void delandinsercluster(double* data,int bestnum)
{
int i;
for (i=0;i<knum;i++)
{
ccLink pointer;
pointer = &kcluster[i];
ccLink back=pointer;
pointer=pointer->next;
for ( ;pointer!=NULL;pointer=pointer->next)
{
if (dist(data,pointer->data)<NUM_MIN)
{
if (i!=bestnum)
{
//查入到bestnum簇中
insertcluster(&kcluster[bestnum],data,pointer->dataindex);
numperclu[bestnum]++;//簇内数目加一
//删除i簇中的节点,并查入到bestnum簇中
back->next=pointer->next;
free(pointer->data);
free(pointer);
numperclu[i]--;//簇内数目减一
}
break;
}//end if
back = pointer;
}//end for
}//end for
}
////////////////////
int kmeancluster(double** pos)
{
int i,j;
int bestnum=0;
double distance=0.0;
for (i=0;i<posnum;i++)
{
double min=NUM_MAX;
for (j=0;j<knum;j++)
{
distance=dist(pos[i],kcluster[j].data);
if (distance<min)
{
min=distance;
bestnum=j;
}
}
delandinsercluster(pos[i],bestnum);
}
double** temp=(double**)malloc(knum*sizeof(double*));
for (i=0;i<knum;i++)
temp[i]=(double*)malloc(row*sizeof(double));
for (i=0;i<knum;i++)
for (j=0;j<row;j++)
temp[i][j]=kcluster[i].data[j];//后面的值
for (i=0;i<knum;i++)
calmean(&kcluster[i]);
int isstop = isexit(temp);
for (i=0;i<knum;i++)
free(temp[i]);
free(temp);
return isstop;
}
/////////////////
void printcluster()
{
ccLink pointer;
FILE* fp;
if ((fp=fopen("out.txt","w"))==NULL)
{
printf("error! can't open out file!\n");
exit(0);
}
int i,j;
for (i=0;i<knum;i++)
{
printf("第%d组:数目:%d节点:",i,numperclu[i]);
fprintf(fp,"%d %d ",i,numperclu[i]);
for (pointer=kcluster[i].next;pointer!=NULL;
pointer=pointer->next )
{
printf("%d ",pointer->dataindex);
fprintf(fp,"%d ",pointer->dataindex);
}
printf("\n");
fprintf(fp,"\n");
}
fclose(fp);
}
////////////
void freeall()
{
free(numperclu);
int i;
ccLink pointer;
ccLink tpointer;
for(i=0;i<knum;i++)
{
pointer=&kcluster[i];
pointer=pointer->next;
while (pointer!=NULL)
{
tpointer=pointer;
pointer=pointer->next;
free(tpointer->data);
free(tpointer);
}
free(kcluster[i].data);
}
free(kcluster);
}
// main
int main()
{
knum=3;
int i=0;
double **pos = initdata();
int isstop=1;
while (isstop)
{
i++;
isstop = kmeancluster(pos);
printf("第%d次迭代\n",i);
}
printcluster();
freeall();
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -