📄 kmean.cpp
字号:
/****************************************************************************
* *
* K平均算法 *
* *
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
// 定义
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10
char *f2a(double x, int width)
{
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s);
if (s)
{
strcpy(cbuf,"-");
}
else
{
strcpy(cbuf," ");
}
if (d>0)
{
for (i=0; i<d; i++)
{
cbuf[i+1]=cp[i];
}
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
}
else
{
if (d==0)
{
strcat(cbuf,".");
strcat(cbuf,cp);
}
else
{
k=-d;
strcat(cbuf,".");
for (i=0; i<k; i++)
{
strcat(cbuf,"0");
}
strcat(cbuf,cp);
}
}
cp=&cbuf[0];
return cp;
}
// 定义结构和类
struct aCluster
{
double Center[MAXVECTDIM];
int Member[MAXPATTERN]; //某一聚类中各个所属成员矢量的索引
int NumMembers;
};
struct aVector
{
double Center[MAXVECTDIM];
int Size;
};
class System
{
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1];
aCluster Cluster[MAXCLUSTER];
int NumPatterns; // 模式数
int SizeVector; // 矢量维数
int NumClusters; // 聚类数
void DistributeSamples(); // k均值算法第二步
int CalcNewClustCenters();// 第三步
double EucNorm(int, int); // 计算欧几里得距离
int FindClosestCluster(int); //将矢量归入离其聚类最近的聚类
public:
system();
int LoadPatterns(char *fname); // 读入模式数
void InitClusters(); // k均值算法的第一步
void RunKMeans(); // k均值算法的全局控制函数
void ShowClusters(); // 在屏幕上显示结果
void SaveClusters(char *fname); // 保存结果
void ShowCenters();
};
void System::ShowCenters()
{
int i,j;
printf("Cluster centers:\n");
for (i=0; i<NumClusters; i++)
{
Cluster[i].Member[0]=i;
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
}
printf("\n");
}
int System::LoadPatterns(char *fname)
{
FILE *InFilePtr;
int i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns); // 读数据类型
fscanf(InFilePtr, "%d", &SizeVector); // 读矢量维数
fscanf(InFilePtr, "%d", &NumClusters); // 读k均值算法分类的类数
for (i=0; i<NumPatterns; i++) { // 对每一个模式
for (j=0; j<SizeVector; j++) {
fscanf(InFilePtr,"%lg",&x); // 该k分类算法处理了所有待分类的模式
Pattern[i][j]=x;
}
}
printf("Input patterns:\n");
for (i=0; i<NumPatterns; i++) {
printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
}
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// 初始化聚类 *
// 将某矢量任意分配给k个聚类(选前k个),此算法中k=2 *
//***************************************************************************
void System::InitClusters()
{
int i,j;
printf("Initial cluster centers:\n");
for (i=0; i<NumClusters; i++)
{
Cluster[i].Member[0]=i;
for (j=0; j<SizeVector; j++)
{
Cluster[i].Center[j]=Pattern[i][j];
}
}
for (i=0; i<NumClusters; i++)
{
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
}
printf("\n");
}
void System::RunKMeans()
{
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE)
{
printf("PASS=%d\n",pass++);
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
}
}
double System::EucNorm(int p, int c) // 计算矢量p与聚类之间的欧几里得距离,c为聚类中心
{
double dist,x;
int i;
char zout[128];
char znum[40];
char *pnum;
pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; i<SizeVector ;i++)
{
x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
strcat(zout,"+");
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
}
printf("%s)\n",zout);
return dist;
}
int System::FindClosestCluster(int pat)
{
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++)
{
d=EucNorm(pat,i);
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (d<MinDist)
{
MinDist=d;
ClustID=i;
}
}
if (ClustID<0)
{
printf("Aaargh");
exit(0);
}
return ClustID;
}
void System::DistributeSamples()
{
int i,pat,Clustid,MemberIndex;
for (i=0; i<NumClusters;i++){
Cluster[i].NumMembers=0;
}
for (pat=0; pat<NumPatterns; pat++) //找出与模式最近的聚类中心
{
Clustid= FindClosestCluster(pat);
printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
//将该模式归入此聚类
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
}
}
int System::CalcNewClustCenters()
{
int ConvFlag,VectID,i,j,k;
double tmp[MAXVECTDIM];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
char *fpv;
pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; i<NumClusters; i++) //对每一个聚类进行处理
{
pnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"Cluster Center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; j<SizeVector; j++) // 清除工作区
{
tmp[j]=0.0;
}
for (j=0; j<Cluster[i].NumMembers; j++) //遍历各个聚类中的矢量成员
{
VectID=Cluster[i].Member[j];
for (k=0; k<SizeVector; k++)
{
tmp[k] += Pattern[VectID][k];
if (k==0) {
strcat(xs,f2a(Pattern[VectID][k],3));
} else {
strcat(ys,f2a(Pattern[VectID][k],3));
}
}
if(j<Cluster[i].NumMembers-1)
{
strcat(xs,"+");
strcat(ys,"+");
}
else
{
strcat(xs,")");
strcat(ys,")");
}
}
for (k=0; k<SizeVector; k++) //遍历矢量中的成员要素
{
tmp[k]=tmp[k]/Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE;
Cluster[i].Center[k]=tmp[k];
}
printf("%s,\n",xs);
printf("%s\n",ys);
}
return ConvFlag;
}
void System::ShowClusters()
{
int cl;
for (cl=0; cl<NumClusters; cl++)
{
printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
}
}
void System::SaveClusters(char *fname)
{
}
main(int argc, char *argv[])
{
System kmeans;
if (argc<2) {
printf("USAGE: KMEANS PATTERN_FILE\n");
exit(0);
}
if (kmeans.LoadPatterns(argv[1])==FAILURE )
{
printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -