📄 kmeans.cpp
字号:
/****************************************************************************
* *
* KMEANS *
* *
*****************************************************************************/
#include <stdafx.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
#include "KMEANS.h"
// FUNCTION PROTOTYPES
// DEFINES
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
void CKmeans::ShowCenters(){
int i,j;
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
}
}
int CKmeans::LoadPatterns(char *fname){
FILE *InFilePtr;
int i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns); // 从文件读样本数目信息
fscanf(InFilePtr, "%d", &SizeVector); // 读取向量维度
fscanf(InFilePtr, "%d", &NumClusters); // 读取K-Means算法的聚类数目
Pattern=new double[NumPatterns*SizeVector];
Cluster=new aCluster[NumClusters];
for (i=0;i<NumClusters;i++)
{
Cluster[i].Center=new double[SizeVector];
Cluster[i].Member=new int[NumPatterns];
}
for (i=0; i<NumPatterns; i++) { //对每一个样本
for (j=0; j<SizeVector; j++) { //建立一个向量
fscanf(InFilePtr,"%lfe",&x); //保存其在dat
*(Pattern+i*SizeVector+j)=x; //文件中的特征信息
}
}
return SUCCESS;
}
//***************************************************************************
// InitClusters *
// 初始化cluster各聚类信息 *
//***************************************************************************
void CKmeans::InitClusters(){
int i,j;
for (i=0; i<NumClusters; i++) {
Cluster[i].Member[0]=i;
for (j=0; j<SizeVector; j++) {
Cluster[i].Center[j]=*(Pattern+i*SizeVector+j);
}
}
}
void CKmeans::RunKMeans(){
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) {
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
}
}
double CKmeans::EucNorm(int p, int c){ // 计算样本特征向量和聚类中心的欧氏距离
double dist,x;
int i;
dist=0;
for (i=0; i<SizeVector ;i++){
x=(Cluster[c].Center[i]-*(Pattern+p*SizeVector+i))*(Cluster[c].Center[i]-*(Pattern+p*SizeVector+i));
if (i!=SizeVector-1)
dist += (Cluster[c].Center[i]-*(Pattern+p*SizeVector+i))*(Cluster[c].Center[i]-*(Pattern+p*SizeVector+i));
}
return dist;
}
int CKmeans::FindClosestCluster(int pat){
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) {
d=EucNorm(pat,i);
if (d<MinDist) {
MinDist=d;
ClustID=i;
}
}
if (ClustID<0) {
exit(0);
}
return ClustID;
}
void CKmeans::DistributeSamples(){
int i,pat,Clustid,MemberIndex;
//初始化聚类中所属向量个数为0
for (i=0; i<NumClusters;i++){
Cluster[i].NumMembers=0;
}
for (pat=0; pat<NumPatterns; pat++) {
//寻找离当前样本距离最近的聚类中心
Clustid= FindClosestCluster(pat);
//将这个样本放入最近的聚类
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
}
}
int CKmeans::CalcNewClustCenters(){
int ConvFlag,VectID,i,j,k;
double *tmp;
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
tmp=new double[SizeVector];
pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
for (i=0; i<NumClusters; i++) { //对每一个聚类计算中心
pnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
for (j=0; j<SizeVector; j++) { // 初始化tmp
tmp[j]=0.0;
}
for (j=0; j<Cluster[i].NumMembers; j++) {
VectID=Cluster[i].Member[j];
for (k=0; k<SizeVector; k++) {
tmp[k] += *(Pattern+VectID*SizeVector+k); //求序号为VectID的样本中特征向量各分量的和
}
}
for (k=0; k<SizeVector; k++) { //求均值
tmp[k]=tmp[k]/Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE;
Cluster[i].Center[k]=tmp[k];
}
}
return ConvFlag;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -