⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeanscluster.cpp

📁 k均值聚类的算法
💻 CPP
字号:
// KMeansCluster.cpp: implementation of the CKMeansCluster class.
//
//////////////////////////////////////////////////////////////////////

#include "KMeansCluster.h"

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CKMeansCluster::CKMeansCluster(int IterationNum, int ClusterNum, map<int, FeatureVector>& FeatureNode, char* OutputFileName)
{
	KMeans(IterationNum, ClusterNum, FeatureNode, OutputFileName);
}

double CKMeansCluster::GetNewCluster(int IterationTime, vector<FeatureVector>& Centroid, map<int, Cluster>& ClusterSet, map<int, FeatureVector>& FeatureNode)
{
	double TotalDistance = 0;
	
	double NewDistance = 0;

	double Distance = -1;

	int CentroidNum = Centroid.size();

	ClusterSet.clear();
	
	map<int, FeatureVector>::iterator f = FeatureNode.begin();
	
	int CentroidPoint;
	
	for ( int c = 0; f != FeatureNode.end(); f++, c++ )
	{
		Distance = -1;

		for ( int i = 0; i < CentroidNum; i++ )
		{
			NewDistance = ComputeDistance((*f).second, Centroid[i]);

			if ( Distance == -1 || NewDistance < Distance )
			{
				Distance = NewDistance;

				CentroidPoint = i;
			}
		}
		
		TotalDistance += Distance;
		
		printf("%d iteration %d feature assigning over\r", IterationTime, c);

		ClusterSet[CentroidPoint].push_back((*f).second);	

	}
	
	return TotalDistance;
}

double CKMeansCluster::ComputeDistance(FeatureVector& Vect1, FeatureVector& Vect2)
{
	double Distance = 0;

	map<int, double>::iterator f = Vect1.begin();
	
	for (; f != Vect1.end(); f++ )
	{
		if ( Vect2.find((*f).first) != Vect1.end() )
		{
			Distance += pow(((*f).second - Vect2[(*f).first]),2);
		}
		else
		{
			Distance += pow((*f).second, 2);
		}
	}
	f = Vect2.begin();
	
	for (; f != Vect2.end(); f++ )
	{
		if ( Vect1.find((*f).first) == Vect1.end() )
		{
			Distance += pow((*f).second, 2);
		}
	}
	
	return Distance;
}

void CKMeansCluster::AdjustCentroid(int IterationNum, vector<FeatureVector>& Centroid, map<int, FeatureVector>& FeatureNode)
{
	double Distance = -1;

	double NewDistance = 0;
	
	double TotalDistance = 0;
	
	map<int, Cluster> ClusterSet;
	
	for ( int i = 0; i < IterationNum; i++ )
	{
		TotalDistance = GetNewCluster(i, Centroid, ClusterSet, FeatureNode);

		Centroid.clear();
		
		map<int, double> CalCentroid;

		map<int, Cluster>::iterator l = ClusterSet.begin();
		
		map<int, double>::iterator m;
		
		for ( ; l != ClusterSet.end(); l++ )
		{
			int PointNum = (*l).second.size();
		
			for ( int t = 0; t < PointNum; t++ )
			{
				m = (*l).second[t].begin();
			
				for ( ; m != (*l).second[t].end(); m++ )
				{
					if ( CalCentroid.find((*m).first) == CalCentroid.end() )
					{
						CalCentroid[(*m).first] = (*m).second / PointNum;
					}
					else
					{
						CalCentroid[(*m).first] += (*m).second / PointNum;
					}
				}
			}
			
			Centroid.push_back(CalCentroid);
			
			CalCentroid.clear();
		}
		printf("%d iteration over Distance = %lf\n", i, TotalDistance);
	} // end of iteration
	
}

void CKMeansCluster::KMeans(int IterationNum, int ClusterNum, map<int, FeatureVector>& FeatureNode, char* OutputFileName)
{
	
	int FeatureNodeNum = FeatureNode.size();
	
	cout << "(2) Cluster number = " << ClusterNum << " Feature number = " << FeatureNodeNum << endl;
	
	vector<FeatureVector> Centroid;
	
	vector<int> CentroidValue;
	
	for ( int i = 0; i < ClusterNum; )
	{
		int InitCentroid = rand() % FeatureNodeNum;
		int Len = Centroid.size();
		bool IsExist = false;
		for ( int j = 0; j < Len; j++ )
		{
			if ( InitCentroid == CentroidValue[j] )
			{
				break;
				IsExist = true;
			}
		}
		if ( !IsExist )
		{
			CentroidValue.push_back(InitCentroid);
			i++;
		}
	}
	vector<int> FeatureId;

	map<int, FeatureVector>::iterator f = FeatureNode.begin();

	for ( ; f != FeatureNode.end(); f++ )
	{
		FeatureId.push_back((*f).first);
	}
	for ( int j = 0; j < ClusterNum; j++ )
	{
		Centroid.push_back(FeatureNode[FeatureId[CentroidValue[j]]]);
	}
	FeatureId.clear();
	// the initial centroids have been set
	cout << "(3) Initial cluster centroid over" << endl;

	AdjustCentroid(IterationNum, Centroid, FeatureNode);
	
	WriteToFile(OutputFileName, Centroid, FeatureNode);
}

void CKMeansCluster::WriteToFile(char* FileName, vector<FeatureVector>& Centroid, map<int, FeatureVector>& FeatureNode)
{
	FILE* OutputP = fopen(FileName, "w");
	if ( !OutputP )
	{
		cout << " Write ot File Error! " << endl;
		exit(0);
	}
	map<int, ClusterID> ClusterContainID;
	map<int, FeatureVector>::iterator f = FeatureNode.begin();
	
	int ClusterNum = Centroid.size();
	int ClusterPoint = 0;

	double Distance = -1;
	double CalDistance = 0;

	

	for ( int q = 0; f != FeatureNode.end(); f++, q++ )
	{
		double Distance = -1;
		
		for ( int i = 0; i < ClusterNum; i++ )
		{
			CalDistance = ComputeDistance((*f).second, Centroid[i]);

			if ( Distance == -1 || CalDistance < Distance )
			{
				ClusterPoint = i;
				Distance = CalDistance;
			}
		}
		ClusterContainID[ClusterPoint].push_back((*f).first);

		printf("%d assgining cluster over\r", q);
		
	}

	map<int, ClusterID>::iterator j = ClusterContainID.begin();

	for (int c = 0 ; j != ClusterContainID.end(); j++, c++ )
	{
		int PointNum = (*j).second.size();
		
		fprintf(OutputP, "Cluster %d ", c);

		for ( int p = 0; p < PointNum; p++ )
		{
			fprintf(OutputP, "%d ", (*j).second[p]);
		}
		fprintf(OutputP, "\n");
	}

	fclose(OutputP);
}

CKMeansCluster::~CKMeansCluster()
{

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -