📄 isodata.cpp
字号:
// ISODATA.cpp : Defines the entry point for the console application.
//
#include <fstream.h>
#include <iostream.h>
#include <stdio.h>
#include <stdlib.h>
#include "math.h"
#define N 150
#define eps 0.00001
struct Pointf
{
int sequence;
float x1;
float x2;
float x3;
float x4;
};
struct PointZ
{
float x1;
float x2;
float x3;
float x4;
};
float CalDistancef(Pointf x1,Pointf x2)
{
return sqrtf((x1.x1-x2.x1)*(x1.x1-x2.x1)+(x1.x2-x2.x2)*(x1.x2-x2.x2)+(x1.x3-x2.x3)*(x1.x3-x2.x3)+(x1.x4-x2.x4)*(x1.x4-x2.x4));
}
float CalDistanceZ(PointZ x1,PointZ x2)
{
return sqrtf((x1.x1-x2.x1)*(x1.x1-x2.x1)+(x1.x2-x2.x2)*(x1.x2-x2.x2)+(x1.x3-x2.x3)*(x1.x3-x2.x3)+(x1.x4-x2.x4)*(x1.x4-x2.x4));
}
float CalDistancefZ(Pointf x1,PointZ x2)
{
return sqrtf((x1.x1-x2.x1)*(x1.x1-x2.x1)+(x1.x2-x2.x2)*(x1.x2-x2.x2)+(x1.x3-x2.x3)*(x1.x3-x2.x3)+(x1.x4-x2.x4)*(x1.x4-x2.x4));
}
int main(int argc, char* argv[])
{
Pointf pts[N];
int i = 0;
int j,m;
ifstream inFile("iris.txt");
if(!inFile)
{
cout<<"请把iris.txt放到程序所在目录";
exit(1);
}
while(!inFile.eof())
{
// inFile_dict.getline(w,sizeof(w),'.');
inFile>>pts[i].sequence;
inFile>>pts[i].x1;
inFile>>pts[i].x2;
inFile>>pts[i].x3;
inFile>>pts[i].x4;
i++;
}
inFile.close();
printf("样本集为:\n");
for(i=0;i<N;i++)
{
printf("X%d(%.1f,%.1f,%.1f,%.1f) ",pts[i].sequence,pts[i].x1,pts[i].x2,pts[i].x3,pts[i].x4);
if((i+1)%3==0)
{
printf("\n");
}
}
printf("\n");
printf("\n");
int Nc=0;
printf("please input Nc(0-150): ");
scanf("%d",&Nc);
int Z[N];
for(i=0;i<Nc;i++)
{
printf("输入初始第%d聚类中心的序号(0-149):",i);
scanf("%d",&Z[i]);
}
int Nj[N]; //记录每个类中元素的个数
PointZ ZArray[N];
Pointf SAArray[N][N];
float DjAv[N];
float Deltaj[N][2];
float Deltajmax[N];
int DeltajmaxCor[N];
float DAv;
int Nreal=N;
int count=0;
float Dij[N*N/2];
int Diji[N];
int Dijj[N];
int q=0;
int p=0;
float ft;
int it;
int jt;
int flag;
int ss=0;
PointZ Ztp;
PointZ ZArraytp[N];
int Nctp;
char ch;
int cur=0;
for(i=0;i<N;i++)
{
Nj[i]=0;
}
//聚类中心的特征值
for(i=0;i<Nc;i++)
{
// int ihere=Z[i];
int ihere=i;
ZArray[i].x1=pts[ihere].x1;
ZArray[i].x2=pts[ihere].x2;
ZArray[i].x3=pts[ihere].x3;
ZArray[i].x4=pts[ihere].x4;
}
int K,ThetaN;
float ThetaS,ThetaC;
int L,I;
Step1:
printf("输入预期聚类中心数目 K :");
scanf("%d",&K);
printf("输入每个聚类域中最少的样本数ThetaN: ");
scanf("%d",&ThetaN);
printf("输入同一聚类域中样本标准差的最大值: ");
scanf("%f",&ThetaS);
printf("输入不同聚类域距离最小值: ");
scanf("%f",&ThetaC);
printf("输入一次可以合并的聚类中心的最多对数: ");
scanf("%d",&L);
printf("输入最大迭代次数: ");
scanf("%d",&I);
Step2:
for(i=0;i<Nc;i++)
{
Nj[i]=0;
}
printf("\n");
printf("这是第%d次归类\n",count+1);
for(i=0;i<N;i++) //将模式样本归类
{
if(pts[i].sequence==-1)continue; //若该点的序号为-1则说明它是被剔除的
float dis=1.0e+10;
int xx=0;
float ftemp;
for(j=0;j<Nc;j++)
{
ftemp=CalDistancefZ(pts[i],ZArray[j]);
if(ftemp<dis||fabs(dis-ftemp)<eps)
{
xx=j;
dis=ftemp;
}
}
SAArray[xx][Nj[xx]].x1=pts[i].x1;
SAArray[xx][Nj[xx]].x2=pts[i].x2;
SAArray[xx][Nj[xx]].x3=pts[i].x4;
SAArray[xx][Nj[xx]].x3=pts[i].x4;
SAArray[xx][Nj[xx]].sequence=pts[i].sequence;
Nj[xx]=Nj[xx]+1;
}
for(i=0;i<Nc;i++)
{
printf("第%d个聚类中心是:(%.2f,%.2f,%.2f,%.2f) ",i,ZArray[i].x1,ZArray[i].x2,ZArray[i].x3,ZArray[i].x4);
printf("包含的元素有:",i);
for(j=0;j<Nj[i];j++)
{
printf(" X%d ",SAArray[i][j].sequence);
}
printf("\n");
// printf("Nj(%d) is %d\n",i,Nj[i]);
}
count++;
Step3:
for(j=0;j<Nc;j++) //是否可以去掉一些数据
{
if(Nj[j]<ThetaN)
{
for(i=0;i<Nj[j];i++)
{
pts[SAArray[j][i].sequence].sequence=-1;
}
i=j;
int tr=j;
Nreal-=Nj[j];
while(j<Nc-1)
{
for(m=0;m<Nj[j+1];m++)
{
SAArray[j][m].x1=SAArray[j+1][m].x1;
SAArray[j][m].x2=SAArray[j+1][m].x2;
SAArray[j][m].x3=SAArray[j+1][m].x3;
SAArray[j][m].x4=SAArray[j+1][m].x4;
SAArray[j][m].sequence=SAArray[j+1][m].sequence;
}
j++;
}
while(i<Nc-1)
{
Nj[i]=Nj[i+1];
i++;
}
Nc--;
j=tr;
}
}
Step4: //修正各聚类中心
for(j=0;j<Nc;j++)
{
float temx=0,temy=0;
for(i=0;i<Nj[j];i++)
{
temx+=SAArray[j][i].x1;
temy+=SAArray[j][i].x2;
temx+=SAArray[j][i].x3;
temx+=SAArray[j][i].x4;
}
ZArray[j].x1=temx/Nj[j];
ZArray[j].x2=temy/Nj[j];
ZArray[j].x3=temx/Nj[j];
ZArray[j].x4=temx/Nj[j];
}
Step5://计算各聚类域中诸样本与聚类中心的平均距离
float temp=0.0;
for(j=0;j<Nc;j++)
{
for(i=0;i<Nj[j];i++)
{
temp+=CalDistancefZ(SAArray[j][i],ZArray[j]);
}
DjAv[j]=temp/Nj[j];
temp=0.0;
}
Step6://计算全部模式样本对应聚类中心的总平均距离
DAv=0;
for(j=0;j<Nc;j++)
{
DAv+=Nj[j]*DjAv[j];
}
DAv/=Nreal;
Step7:
if(count>=I) goto Step14;
if(Nc<=K/2)goto Step8;
if((count%2==0)||Nc>=2*K) goto Step11;
Step8://计算各聚类中样本距离标准差
for(j=0;j<Nc;j++)
{
float temx=0.0,temy=0.0;
for(i=0;i<Nj[j];i++)
{
temx+=(SAArray[j][i].x1-ZArray[j].x1)*(SAArray[j][i].x1-ZArray[j].x1);
temy+=(SAArray[j][i].x2-ZArray[j].x2)*(SAArray[j][i].x2-ZArray[j].x2);
temx+=(SAArray[j][i].x3-ZArray[j].x3)*(SAArray[j][i].x3-ZArray[j].x3);
temx+=(SAArray[j][i].x4-ZArray[j].x4)*(SAArray[j][i].x4-ZArray[j].x4);
}
Deltaj[j][0]=sqrtf(temx/Nj[j]);
Deltaj[j][1]=sqrtf(temy/Nj[j]);
temx=0.0,temy=0.0;
}
Step9://求每个标准差向量中的最大分量
for(j=0;j<Nc;j++)
{
Deltajmax[j]=Deltaj[j][0]>Deltaj[j][1]?Deltaj[j][0]:Deltaj[j][1];
DeltajmaxCor[j]=Deltaj[j][0]>Deltaj[j][1]?0:1;
}
Step10://分裂判断和计算
for(j=0;j<Nc;j++)
{
if(Deltajmax[j]>ThetaS)
{
if((DjAv[j]>DAv&&Nj[j]>2*(ThetaN+1))||Nc<=K/2)
{
float Garma=0.5;
PointZ Zj1,Zj2;
if(DeltajmaxCor[j]==0)
{
Zj1.x1=ZArray[j].x1+Deltajmax[j]*Garma;
Zj1.x2=ZArray[j].x2;
Zj1.x3=ZArray[j].x3;
Zj1.x4=ZArray[j].x4;
Zj2.x1=ZArray[j].x1-Deltajmax[j]*Garma;
Zj2.x2=ZArray[j].x2;
Zj2.x3=ZArray[j].x3;
Zj2.x4=ZArray[j].x4;
}
else if(DeltajmaxCor[j]==1)
{
Zj1.x1=ZArray[j].x1;
Zj1.x2=ZArray[j].x2+Deltajmax[j]*Garma;
Zj1.x3=ZArray[j].x3;
Zj1.x4=ZArray[j].x4;
Zj2.x1=ZArray[j].x1;
Zj2.x2=ZArray[j].x2-Deltajmax[j]*Garma;
Zj2.x3=ZArray[j].x3;
Zj2.x4=ZArray[j].x4;
}
else if(DeltajmaxCor[j]==2)
{
Zj1.x1=ZArray[j].x1;
Zj1.x2=ZArray[j].x2;
Zj1.x3=ZArray[j].x3+Deltajmax[j]*Garma;
Zj1.x4=ZArray[j].x4;
Zj2.x1=ZArray[j].x1;
Zj2.x2=ZArray[j].x2;
Zj2.x3=ZArray[j].x3-Deltajmax[j]*Garma;
Zj2.x4=ZArray[j].x4;
}
else if(DeltajmaxCor[j]==3)
{
Zj1.x1=ZArray[j].x1;
Zj1.x2=ZArray[j].x2;
Zj1.x3=ZArray[j].x3;
Zj1.x4=ZArray[j].x4+Deltajmax[j]*Garma;
Zj2.x1=ZArray[j].x1;
Zj2.x2=ZArray[j].x2;
Zj2.x3=ZArray[j].x3;
Zj2.x4=ZArray[j].x4-Deltajmax[j]*Garma;
}
ZArray[j].x1=Zj1.x1;
ZArray[j].x2=Zj1.x2;
ZArray[j].x3=Zj1.x3;
ZArray[j].x4=Zj1.x4;
ZArray[Nc].x1=Zj2.x1;
ZArray[Nc].x2=Zj2.x2;
ZArray[Nc].x3=Zj2.x3;
ZArray[Nc].x4=Zj2.x4;
Nc++;
goto Step2;
}
}
}
Step11://计算全部聚类中心的距离
ss=0;
for(i=0;i<Nc-1;i++)
{
for(j=i+1;j<Nc;j++)
{
Dij[ss]=CalDistanceZ(ZArray[i],ZArray[j]);
Diji[ss]=i;
Dijj[ss]=j;
ss++;
}
}
Step12: //简单起见,只考虑一次只合并一对聚类中心的情况
//找出类间距离最小的
ft=Dij[0];
it=Diji[0];
jt=Dijj[0];
for(i=1;i<ss;i++)
{
if(Dij[i]<ft)
{
ft=Dij[i];
it=Diji[i];
jt=Dijj[i];
}
}
Step13:
if(ft<ThetaC)
{
Ztp.x1=(Nj[it]*ZArray[it].x1+Nj[jt]*ZArray[jt].x1)/(Nj[it]+Nj[jt]);
Ztp.x2=(Nj[it]*ZArray[it].x2+Nj[jt]*ZArray[jt].x2)/(Nj[it]+Nj[jt]);
Ztp.x3=(Nj[it]*ZArray[it].x3+Nj[jt]*ZArray[jt].x3)/(Nj[it]+Nj[jt]);
Ztp.x4=(Nj[it]*ZArray[it].x4+Nj[jt]*ZArray[jt].x4)/(Nj[it]+Nj[jt]);
ZArray[it].x1=Ztp.x1;
ZArray[jt].x2=Ztp.x2;
ZArray[it].x3=Ztp.x3;
ZArray[it].x4=Ztp.x4;
j=jt;
while(j<Nc-1)
{
ZArray[j].x1=ZArray[j+1].x1;
ZArray[j].x2=ZArray[j+1].x2;
ZArray[j].x3=ZArray[j+1].x3;
ZArray[j].x4=ZArray[j+1].x4;
j++;
}
j=jt;
while(j<Nc-1)
{
Nj[j]=Nj[j+1];
j++;
}
Nc--;
}
Step14:
if(count>=I)
{
count=0;
printf("\n");
printf("共分为%d类\n",Nc);
return 0;
}
else
{
goto Step2;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -