📄 kmeans.cpp
字号:
/****************************************************************************
* *
* KMEANS *
* *
*****************************************************************************/
#include < stdio.h>
#include < stdlib.h>
#include < string.h>
#include < conio.h>
#include < math.h>
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 4
#define MAXPATTERN 1588
#define MAXCLUSTER 10
struct aCluster
{
double Center[MAXVECTDIM];
int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster
int NumMembers;
};
struct aVector
{
double Center[MAXVECTDIM];
int Size;
};
static double Pattern[MAXPATTERN][MAXVECTDIM + 1];
class System
{
private :
aCluster Cluster[MAXCLUSTER];
int NumPatterns; // Number of patterns
int SizeVector; // Number of dimensions in vector
int NumClusters; // Number of clusters
void DistributeSamples(); // Step 2 of K-means algorithm
int CalcNewClustCenters(); // Step 3 of K-means algorithm
double EucNorm(int, int); // Calc Euclidean norm vector
int FindClosestCluster(int); //ret indx of clust closest to pattern
//whose index is arg
public :
System(){};
int LoadPatterns(char * fname); // Get pattern data to be clustered
void InitClusters(); // Step 1 of K-means algorithm
void RunKMeans(); // Overall control K-means process
void ShowClusters(); // Show results on screen
void SaveClusters(char * fname); // Save results to file
void ShowCenters();
};
void System::ShowCenters()
{
int i;
printf("Cluster centers:\n");
for (i = 0; i < NumClusters; i++)
{
Cluster[i].Member[0] = i;
printf("ClusterCenter[%d]=(%f,%f)\n", i, Cluster[i].Center[0],Cluster[i].Center[1]);
int b=0;
}
// endfor
printf("\n");
}
int System::LoadPatterns(char * fname)
{
FILE * InFilePtr;
int i, j;
double x;
if ( (InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", & NumPatterns); // Read # of patterns
fscanf(InFilePtr, "%d", & SizeVector); // Read dimension of vector
fscanf(InFilePtr, "%d", & NumClusters); // Read # of clusters for K-Means
for (i = 0; i < NumPatterns; i++)
{ // For each vector
for (j = 0; j < SizeVector; j++)
{ // create a pattern
fscanf(InFilePtr, "%lg", & x); // consisting of all elements
Pattern[i][j] = x;
}
// endfor
}
// endfor
printf("Input patterns:\n");
for (i = 0; i < NumPatterns; i++)
{
printf("Pattern[%d]=(%2.3f,%2.3f,%d,%d)\n", i, Pattern[i][0], Pattern[i][1],Pattern[i][2], Pattern[i][3]);
//printf("Pattern[%d]=(%2.3f,%2.3f)\n", i, Pattern[i][0], Pattern[i][1]);
}
//endfor
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// InitClusters *
// Arbitrarily assign a vector to each of the K clusters *
// We choose the first K vectors to do this *
//***************************************************************************
void System::InitClusters()
{
int i, j;
SizeVector=2;
printf("Initial cluster centers:\n");
for (i = 0; i < NumClusters; i++)
{
Cluster[i].Member[0] = i;
for (j = 0; j < SizeVector; j++)
{
Cluster[i].Center[j] = Pattern[i][j];
}
}
for (i = 0; i < NumClusters; i++)
{
printf("ClusterCenter[%d]=(%f,%f)\n", i, Cluster[i].Center[0], Cluster[i].Center[1]);
}
printf("\n");
}
void System::RunKMeans()
{
int converged;
int pass;
pass = 1;
converged = FALSE;
while (converged == FALSE)
{
printf("PASS=%d\n", pass++);
DistributeSamples();
converged = CalcNewClustCenters();
ShowCenters();
}// endwhile
}
// Calc Euclidean norm of vector difference
double System::EucNorm(int p, int c)
{
double dist, x; // between pattern vector, p, and cluster
int i; // center, c.
dist = 0;
for (i = 0; i < SizeVector; i++)
{
x = (Cluster[c].Center[i] - Pattern[p][i]) *
(Cluster[c].Center[i] - Pattern[p][i]);
dist += (Cluster[c].Center[i] - Pattern[p][i]) *
(Cluster[c].Center[i] - Pattern[p][i]);
}
return dist;
}
int System ::FindClosestCluster(int pat)
{
int i, ClustID;
double MinDist, d;
MinDist = 9.9e+99;
ClustID = -1;
for (i = 0; i < NumClusters; i++)
{
d = EucNorm(pat, i);
printf("Distance from pattern %d to cluster %d is %f\n", pat, i, sqrt(d));
if (d < MinDist)
{
MinDist = d;
ClustID = i;
}
}
if (ClustID < 0)
{
printf("Aaargh");
exit(0);
}
return ClustID;
}
void System ::DistributeSamples()
{
int i, pat, Clustid, MemberIndex;
//Clear membership list for all current clusters
for (i = 0; i < NumClusters; i++)
{
Cluster[i].NumMembers = 0;
}
for (pat = 0; pat < NumPatterns; pat++)
{
//Find cluster center to which the pattern is closest
Clustid = FindClosestCluster(pat);
printf("patern %d assigned to cluster %d\n\n", pat, Clustid);
//post this pattern to the cluster
MemberIndex = Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex] = pat;
Cluster[Clustid].NumMembers++;
}
// endfor //
}
int System::CalcNewClustCenters()
{
int ConvFlag, VectID, i, j, k;
double tmp[MAXVECTDIM];
char xs[255];
char szBuf[255];
ConvFlag = TRUE;
printf("The new cluster centers are now calculated as:\n");
//for each cluster
for (i = 0; i < NumClusters; i++)
{
strcpy(xs,"");
printf("Cluster Center %d (1/%d):\n",i,Cluster[i].NumMembers);
// clear workspace
for (j = 0; j < SizeVector; j++)
{
tmp[j] = 0.0;
}// endfor
//traverse member vectors
for (j = 0; j < Cluster[i].NumMembers; j++)
{
VectID = Cluster[i].Member[j];
//traverse elements of vector
for (k = 0; k < SizeVector; k++)
{
tmp[k] += Pattern[VectID][k]; // add (member) pattern elmnt into temp
sprintf(szBuf,"%d ",Pattern[VectID][k]);
//strcat(xs, f2a(Pattern[VectID][k], 3));
strcat(xs,szBuf);
}
printf("%s\n",xs);
strcpy(xs,"");
}// endfor //
//traverse elements of vector
for (k = 0; k < SizeVector; k++)
{
if(Cluster[i].NumMembers!=0)
tmp[k] = tmp[k] / Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag = FALSE;
Cluster[i].Center[k] = tmp[k];
}
printf("%s,\n", xs);
}
return ConvFlag;
}
void System::ShowClusters()
{
int cl;
for (cl = 0; cl < NumClusters; cl++)
{
printf("\nCLUSTER %d ==>[%f,%f]\n", cl, Cluster[cl].Center[0],
Cluster[cl].Center[1]);
}
// endfor //
}
void System::SaveClusters(char * fname)
{
}
#include <windows.h>
void main(int argc, char * argv[])
{
System kmeans;
if (argc < 2)
{
printf("USAGE: KMEANS PATTERN_FILE\n");
exit(0);
}
if (kmeans.LoadPatterns(argv[1]) == FAILURE)
{
printf("UNABLE TO READ PATTERN_FILE:%s\n", argv[1]);
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
kmeans.ShowCenters();
return ;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -