⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.cpp

📁 基本的k-means聚类算法c++实现
💻 CPP
字号:
#include "KMeans.h"#include <math.h>#include <stdlib.h>#include <iostream.h>#include <fstream.h>#include <time.h>#define EPSILON 0.000001//#define TESTNOWKMeans::KMeans(double** buf, int num, int dim){	number = num;	dimension = dim;	buffer = buf;	naiveIterateTime = 0;	boundIterateTime = 0;	stopTime = 0;}KMeans::~KMeans(){}double KMeans::GetDistSqu(double* p1, double* p2){	int i;	double distSqu = 0;	for(i = 0; i < dimension; i++)	{		distSqu += (p1[i] - p2[i]) * (p1[i] - p2[i]);	}	return distSqu;}double KMeans::GetCost(double** centers, int k){	int i,j;//	int closeCenter;	double minDistSqu;	double distSquSum = 0;	for(i = 0; i < number; i++)	{		//closeCenter = 0;		minDistSqu = GetDistSqu(buffer[i], centers[0]);		for(j = 1; j < k; j++)		{			double temp = GetDistSqu(buffer[i], centers[j]);			if(temp < minDistSqu)			{				//closeCenter = j;				minDistSqu = temp;			}		}		distSquSum += minDistSqu;	}	return distSquSum;}void KMeans::AccCluster(double** optSolution, int k, int randomNum, int time){	int i, m, n;	clusterTime = 0;	iterateTime = 0;	belongings = new int[number];	ranks = new int[2*number];	differences = new double[2*number];	quadPara = new double[2*number];	linearPara = new double[2*number];	constPara = new double[2*number];	double optCost = number*dimension;	double** tempSolution = new double*[k];	for(i = 0; i < k; i++)		tempSolution[i] = new double[dimension];	srand(randomNum);	for(i = 0; i < time; i++)	{		//srand(randomNum+i);		//invoke AccOneCluster here		double temp = AccOneCluster(tempSolution, k, optCost);		if(temp < optCost)		{			optCost = temp;			for(m = 0; m < k; m++)			{				for(n = 0; n < dimension; n++)				{					optSolution[m][n] = tempSolution[m][n];				}			}		}#ifdef TESTNOW		cout << "Run " << clusterTime << " Finished" << endl;#endif	}	cout << "Naive Iterate time : " << naiveIterateTime << endl;	cout << "Bound Iterate time : " << boundIterateTime << endl;	cout << "Stop Iterate time  : " << stopTime << endl;	cout << "Best Solution Cost : " << optCost << endl;	//OutputSolution(tempSolution, k);	for(i = 0; i < k; i++)		delete [] tempSolution[i];	delete [] tempSolution;	delete [] belongings;	delete [] ranks;	delete [] differences;	delete [] quadPara;	delete [] linearPara;	delete [] constPara;}void KMeans::Cluster(double** optSolution, int k, int randomNum, int time){	int i, m, n;	double optCost = number*dimension;	double** tempSolution = new double*[k];	for(i = 0; i < k; i++)		tempSolution[i] = new double[dimension];	srand(randomNum);	for(i = 0; i < time; i++)	{		//srand(randomNum+i);		double temp;		if((temp = OneCluster(tempSolution, k)) < optCost)		{			//If the temp solution is better, replace it with the optimal solution			optCost = temp;			for(m = 0; m < k; m++)			{				for(n = 0; n < dimension; n++)				{					optSolution[m][n] = tempSolution[m][n];				}			}		}	}	cout << "Naive Iterate time : " << naiveIterateTime << endl;	cout << "Bound Iterate time : " << boundIterateTime << endl;	cout << "Best Solution Cost : " << optCost << endl;	//OutputSolution(tempSolution, k);	for(i = 0; i < k; i++)		delete [] tempSolution[i];	delete [] tempSolution;}void KMeans::OutputSolution(double** solution, int k){	int i,j;	for(i = 0; i < k; i++)	{		for(j = 0; j < dimension; j++)			cout << solution[i][j] << " ";		cout << endl;	}}double KMeans::AccOneCluster(double** tempSolution, int k, double optCost){	int i,j;	clusterTime++;	iterateTime = 0;	for(i = 0; i < k; i++)	{		//Choose a point in the data set randomly		int pos = rand() % number;		for(j = 0; j < dimension; j++)		{			tempSolution[i][j] = buffer[pos][j];		}	}	//OutputSolution(tempSolution, k);	//int iterateTime = 1;	for(i = 0; i < number; i++)		belongings[i] = 0;	newSmallCluster = 0;	bool noBound = false;	do{		//cout << iterateTime++ << " : " << endl;		//invoke AccIterate here		int temp;		if(noBound)			temp = OneIterate(tempSolution, k);		else			temp = AccIterate(tempSolution, k, optCost);		if(temp == 0)			break;		if(temp == 1)		{			stopTime++;
			return dimension * number;		}		if(temp == 3)			noBound = true;	}while(true);	return GetCost(tempSolution, k);}double KMeans::OneCluster(double** tempSolution, int k){	int i,j;	for(i = 0; i < k; i++)	{		//Choose a point in the data set randomly		int pos = rand() % number;		for(j = 0; j < dimension; j++)		{			tempSolution[i][j] = buffer[pos][j];		}	}	//OutputSolution(tempSolution, k);	//cout << GetCost(tempSolution, k) << endl;	//while(!OneIterate(tempSolution, k));	//int iterateTime = 1;	do{		//cout << iterateTime++ << " : " << endl;	}while(OneIterate(tempSolution, k)>0);	//OutputSolution(tempSolution, k);	return GetCost(tempSolution, k);}void KMeans::swap(int* array, int left, int right){	int temp = array[left];	array[left] = array[right];	array[right] = temp;}void KMeans::QuickSort(int left, int right){	//cout << left << " " << right << endl;    if (left >= right) return;	int newleft = rand()%(right-left+1) + left;	swap(ranks, newleft, left);    int last = left, i;    //swap(a,left,left);    for (i = left + 1; i <= right; i++)	{        if (differences[ranks[i]] < differences[ranks[left]])            swap(ranks,++last,i);	}    swap(ranks,left,last);	//cout << last << endl;    QuickSort(left,last-1);    QuickSort(last+1,right);}double* diff;int compare( const void *arg1, const void *arg2 ){	int p1 = *((int*)arg1);	int p2 = *((int*)arg2);	if(diff[p1] < diff[p2])		return -1;	if(diff[p1] > diff[p2])		return 1;	return 0;}double KMeans::LowerBound(double currentCost, double lowerDelta,						  double upperDelta){	int i;	double quad = number;	double linear = 0;	double constant = 0;	//cout << upperDelta << endl;	int searchNum = 0;	for(i = 0; i < 2*number; i++)	{		if(differences[i] < lowerDelta)		{			quad += quadPara[i];			linear += linearPara[i];			constant += constPara[i];		}
#ifdef TESTNOW
		else
#endif#ifndef TESTNOW		else if(differences[i] <= upperDelta)		{#endif			ranks[searchNum++] = i;#ifndef TESTNOW		}#endif	}#ifdef TESTNOW//	cout << "Number of Delta   : " << searchNum << endl;//	cout << "Lower Delta   : " << lowerDelta << endl;//	cout << "Upper Delta   : " << upperDelta << endl;	int lasttime = time(NULL);#endif//	if(clusterTime == 3 && iterateTime == 3)//	{/*		for(i = 0; i < searchNum; i++)		{			if(differences[ranks[i]] < lowerDelta || differences[ranks[i]] > upperDelta)				cout << "wrong" << endl;		}*///	}	//QuickSort(0, searchNum-1);	diff = differences;	qsort(ranks, searchNum, sizeof(int), compare);#ifdef TESTNOW//	cout << "Sorting Finished in " << time(NULL) - lasttime << // "seconds" << endl;#endif	double delta = 0;	double DiffSum;	for(i = 0; i < searchNum; i++)	{		delta = differences[ranks[i]];		//cout << delta << endl;		//DiffSum = delta * oldSmallCluster - hardDiffSum - 2 * delta * zeroPos + softDiffSum;		DiffSum = delta*delta*quad - 2*delta*linear - constant;		if(DiffSum > 0)		{			#ifdef TESTNOW			cout << "Delta         : " << delta << endl;			cout << "Lower Bound : " << currentCost - delta * delta* number << endl;			#endif			return currentCost - delta * delta * number;		}		quad += quadPara[ranks[i]];		linear += linearPara[ranks[i]];		constant += constPara[ranks[i]];	}	return 0;}int KMeans::AccIterate(double** tempSolution, int k, double optCost){	int i,j;	int closeCenter;	double minDistSqu;	double oldMinDistSqu;	double secMinDistSqu;	double distSquSum = 0;	//double oldCost = 0;	double hardDiffSum = 0;	//double hardSqrtSum = 0;	double hardDistSum = 0;	boundIterateTime++;	iterateTime++;	//cout << iterateTime << endl;	int* clusterSize = new int[k];	double** sumArray = new double*[k];	for(i = 0; i < k; i++)	{		clusterSize[i] = 0;		sumArray[i] = new double[dimension];				for(j = 0; j < dimension; j++)			sumArray[i][j] = 0;	}	for(i = 0; i < number; i++)	{		//ranks[i] = i;		//ranks[number + i] = number + i;				closeCenter = belongings[i];		minDistSqu = GetDistSqu(buffer[i], tempSolution[closeCenter]);		oldMinDistSqu = minDistSqu;		secMinDistSqu = dimension;		//oldCost += minDistSqu;		for(j = 0; j < k; j++)		{			if(j == belongings[i])				continue;			double temp = GetDistSqu(buffer[i], tempSolution[j]);			if(temp < minDistSqu)			{				closeCenter = j;				secMinDistSqu = minDistSqu;				minDistSqu = temp;			}			else if(temp < secMinDistSqu)			{				secMinDistSqu = temp;			}		}		belongings[i] = closeCenter;		clusterSize[closeCenter]++;		distSquSum += minDistSqu;		for(j = 0; j < dimension; j++)			sumArray[closeCenter][j] += buffer[i][j];		if(minDistSqu < oldMinDistSqu)		{			double t1 = sqrt(oldMinDistSqu);			double t2 = sqrt(minDistSqu);			differences[i] = 0;			differences[number + i] = t2;			hardDiffSum += oldMinDistSqu - minDistSqu;			//hardSqrtSum += t1 + t2;			quadPara[i] = 0;			linearPara[i] = t1 + t2;			constPara[i] = 0;			quadPara[number + i] = -1;			linearPara[number + i] = -t2;			constPara[number + i] = minDistSqu;			hardDistSum += t1;		}		else		{			double t1 = sqrt(minDistSqu);			double t3 = sqrt(secMinDistSqu);			differences[i] = (t3 - t1)/2;			differences[number + i] = t3;			quadPara[i] = 0;			quadPara[number + i] = -1;			linearPara[i] = t1 + t3;			linearPara[number + i] = -t3;			constPara[i] = minDistSqu - secMinDistSqu;			constPara[number + i] = secMinDistSqu;		}	}	#ifdef TESTNOW	//cout << iterateTime << endl;		cout << clusterTime << "." << iterateTime <<  " Current Cost  : " << distSquSum << endl;	//cout << "Hard Diff Sum : " << hardDiffSum << endl;	//cout << "Hard Sqrt Sum : " << hardSqrtSum << endl;	#endif	//bool isConverge = true;	for(i = 0; i < k; i++)	{		if(clusterSize[i] == 0)			continue;		for(j = 0; j < dimension; j++)		{			double temp = sumArray[i][j] / clusterSize[i];			/*			if(fabs(temp - tempSolution[i][j]) > EPSILON)			{				isConverge = false;			}			*/			tempSolution[i][j] = temp;		}	}	oldSmallCluster = newSmallCluster;	newSmallCluster = number;	for(i = 0; i < k; i++)		if(clusterSize[i] < newSmallCluster)			newSmallCluster = clusterSize[i];	delete [] clusterSize;	for(i = 0; i < k; i++)		delete [] sumArray[i];	delete [] sumArray;	if(hardDiffSum == 0)		return 0;		if(distSquSum < optCost)		return 3;	double lowerDelta = 2*hardDistSum / number;	double upperDelta = sqrt((distSquSum - optCost + 1) / number);	//double upperDelta = sqrt(distSquSum / number);	if(lowerDelta > upperDelta)		return 2;	double bound = LowerBound(distSquSum, lowerDelta, upperDelta);	//double bound = optCost;	if(bound  > optCost - 1)	{		//cout << bound << " " << optCost << endl;		return 1;	}	return 2;}int KMeans::OneIterate(double** tempSolution, int k){	int i,j;		naiveIterateTime++;	iterateTime++;	//cout << iterateTime << endl;	int* clusterSize = new int[k];	double** sumArray = new double*[k];	for(i = 0; i < k; i++)	{		clusterSize[i] = 0;		sumArray[i] = new double[dimension];				for(j = 0; j < dimension; j++)			sumArray[i][j] = 0;	}	int closeCenter;	double minDistSqu;	double distSquSum = 0;	for(i = 0; i < number; i++)	{		closeCenter = 0;		minDistSqu = GetDistSqu(buffer[i], tempSolution[0]);		for(j = 1; j < k; j++)		{			double temp = GetDistSqu(buffer[i], tempSolution[j]);			if(temp < minDistSqu)			{				closeCenter = j;				minDistSqu = temp;			}		}				clusterSize[closeCenter]++;		distSquSum += minDistSqu;		for(j = 0; j < dimension; j++)		{			sumArray[closeCenter][j] += buffer[i][j];		}	}	//OutputSolution(tempSolution, k);	#ifdef TESTNOW 	//cout << iterateTime << endl;	cout << clusterTime << "." << iterateTime <<  " Current Cost : " << distSquSum << endl;	#endif	bool isConverge = true;	for(i = 0; i < k; i++)	{		if(clusterSize[i] == 0)			continue;		for(j = 0; j < dimension; j++)		{			double temp = sumArray[i][j] / clusterSize[i];			if(fabs(temp - tempSolution[i][j]) > 0)			{				isConverge = false;			}			tempSolution[i][j] = temp;		}	}	delete [] clusterSize;	for(i = 0; i < k; i++)		delete [] sumArray[i];	delete [] sumArray;	if(isConverge)		return 0;	else		return 2;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -