📄 kmeanscluster.cpp
字号:
// KMeansCluster.cpp: implementation of the CKMeansCluster class.
//
//////////////////////////////////////////////////////////////////////
#include "KMeansCluster.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CKMeansCluster::CKMeansCluster(int IterationNum, int ClusterNum, map<int, FeatureVector>& FeatureNode, char* OutputFileName)
{
KMeans(IterationNum, ClusterNum, FeatureNode, OutputFileName);
}
double CKMeansCluster::GetNewCluster(int IterationTime, vector<FeatureVector>& Centroid, map<int, Cluster>& ClusterSet, map<int, FeatureVector>& FeatureNode)
{
double TotalDistance = 0;
double NewDistance = 0;
double Distance = -1;
int CentroidNum = Centroid.size();
ClusterSet.clear();
map<int, FeatureVector>::iterator f = FeatureNode.begin();
int CentroidPoint;
for ( int c = 0; f != FeatureNode.end(); f++, c++ )
{
Distance = -1;
for ( int i = 0; i < CentroidNum; i++ )
{
NewDistance = ComputeDistance((*f).second, Centroid[i]);
if ( Distance == -1 || NewDistance < Distance )
{
Distance = NewDistance;
CentroidPoint = i;
}
}
TotalDistance += Distance;
printf("%d iteration %d feature assigning over\r", IterationTime, c);
ClusterSet[CentroidPoint].push_back((*f).second);
}
return TotalDistance;
}
double CKMeansCluster::ComputeDistance(FeatureVector& Vect1, FeatureVector& Vect2)
{
double Distance = 0;
map<int, double>::iterator f = Vect1.begin();
for (; f != Vect1.end(); f++ )
{
if ( Vect2.find((*f).first) != Vect1.end() )
{
Distance += pow(((*f).second - Vect2[(*f).first]),2);
}
else
{
Distance += pow((*f).second, 2);
}
}
f = Vect2.begin();
for (; f != Vect2.end(); f++ )
{
if ( Vect1.find((*f).first) == Vect1.end() )
{
Distance += pow((*f).second, 2);
}
}
return Distance;
}
void CKMeansCluster::AdjustCentroid(int IterationNum, vector<FeatureVector>& Centroid, map<int, FeatureVector>& FeatureNode)
{
double Distance = -1;
double NewDistance = 0;
double TotalDistance = 0;
map<int, Cluster> ClusterSet;
for ( int i = 0; i < IterationNum; i++ )
{
TotalDistance = GetNewCluster(i, Centroid, ClusterSet, FeatureNode);
Centroid.clear();
map<int, double> CalCentroid;
map<int, Cluster>::iterator l = ClusterSet.begin();
map<int, double>::iterator m;
for ( ; l != ClusterSet.end(); l++ )
{
int PointNum = (*l).second.size();
for ( int t = 0; t < PointNum; t++ )
{
m = (*l).second[t].begin();
for ( ; m != (*l).second[t].end(); m++ )
{
if ( CalCentroid.find((*m).first) == CalCentroid.end() )
{
CalCentroid[(*m).first] = (*m).second / PointNum;
}
else
{
CalCentroid[(*m).first] += (*m).second / PointNum;
}
}
}
Centroid.push_back(CalCentroid);
CalCentroid.clear();
}
printf("%d iteration over Distance = %lf\n", i, TotalDistance);
} // end of iteration
}
void CKMeansCluster::KMeans(int IterationNum, int ClusterNum, map<int, FeatureVector>& FeatureNode, char* OutputFileName)
{
int FeatureNodeNum = FeatureNode.size();
cout << "(2) Cluster number = " << ClusterNum << " Feature number = " << FeatureNodeNum << endl;
vector<FeatureVector> Centroid;
vector<int> CentroidValue;
for ( int i = 0; i < ClusterNum; )
{
int InitCentroid = rand() % FeatureNodeNum;
int Len = Centroid.size();
bool IsExist = false;
for ( int j = 0; j < Len; j++ )
{
if ( InitCentroid == CentroidValue[j] )
{
break;
IsExist = true;
}
}
if ( !IsExist )
{
CentroidValue.push_back(InitCentroid);
i++;
}
}
vector<int> FeatureId;
map<int, FeatureVector>::iterator f = FeatureNode.begin();
for ( ; f != FeatureNode.end(); f++ )
{
FeatureId.push_back((*f).first);
}
for ( int j = 0; j < ClusterNum; j++ )
{
Centroid.push_back(FeatureNode[FeatureId[CentroidValue[j]]]);
}
FeatureId.clear();
// the initial centroids have been set
cout << "(3) Initial cluster centroid over" << endl;
AdjustCentroid(IterationNum, Centroid, FeatureNode);
WriteToFile(OutputFileName, Centroid, FeatureNode);
}
void CKMeansCluster::WriteToFile(char* FileName, vector<FeatureVector>& Centroid, map<int, FeatureVector>& FeatureNode)
{
FILE* OutputP = fopen(FileName, "w");
if ( !OutputP )
{
cout << " Write ot File Error! " << endl;
exit(0);
}
map<int, ClusterID> ClusterContainID;
map<int, FeatureVector>::iterator f = FeatureNode.begin();
int ClusterNum = Centroid.size();
int ClusterPoint = 0;
double Distance = -1;
double CalDistance = 0;
for ( int q = 0; f != FeatureNode.end(); f++, q++ )
{
double Distance = -1;
for ( int i = 0; i < ClusterNum; i++ )
{
CalDistance = ComputeDistance((*f).second, Centroid[i]);
if ( Distance == -1 || CalDistance < Distance )
{
ClusterPoint = i;
Distance = CalDistance;
}
}
ClusterContainID[ClusterPoint].push_back((*f).first);
printf("%d assgining cluster over\r", q);
}
map<int, ClusterID>::iterator j = ClusterContainID.begin();
for (int c = 0 ; j != ClusterContainID.end(); j++, c++ )
{
int PointNum = (*j).second.size();
fprintf(OutputP, "Cluster %d ", c);
for ( int p = 0; p < PointNum; p++ )
{
fprintf(OutputP, "%d ", (*j).second[p]);
}
fprintf(OutputP, "\n");
}
fclose(OutputP);
}
CKMeansCluster::~CKMeansCluster()
{
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -