📄 kmeans.cpp

📁 Scalable k-means software and test datasets This package (a Unix tar file, gzipped) contains the sou
💻 CPP
字号:
/* Scalable K-means clustering softwareCopyright (C) 2000  Fredrik Farnstrom and James LewisThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.See the file README.TXT for more information.*//* kmeans.cpp */#include <math.h>#include <stdlib.h>#include <stdio.h>#include "singleton.h"#include "subcluster.h"#include "kmeans.h"#include "database.h"#include "syntheticdata.h"#include "rambuffer.h"void KMeans::setup(void){	int i;	Subcluster *m;	clusters = 0;	for(i = 0; i < numClusters; i++)	{		m = new Subcluster(dimensions);		if(clusters)			clusters->setPrevious(m);		m->setNext(clusters);		clusters = m;	}}void KMeans::initModel(void){	Subcluster *c;	Singleton p(dimensions);	int i, j;	float *x;	database->reset();	for(c = clusters; c; c = c->getNext())	{		j = (rand() >> 3) % 80;		for(i = 0; i <= j; i++)			x = database->getPoint();		p.set(x);		c->addPoint(&p);		c->updateMean();	}}Subcluster* KMeans::closestCluster(Singleton *p){	float d = 1e20, t;	Subcluster *m, *r;	for(m = clusters; m; m = m->getNext())		if((t = m->distanceSquared(p)) < d)		{	r = m;			d = t;		}	return r;}KMeans::KMeans(Database *db, int k, int dim) : database(db), numClusters(k),		dimensions(dim){	convergenceThreshold = defaultConvergenceThreshold;	setup();}KMeans::~KMeans(){	Subcluster *c, *nc;	for(c = clusters; c; c = nc)	{	nc = c->getNext();		delete c;	}}void KMeans::cluster(long numpoints){	Singleton p(dimensions);	float *x, d;	Subcluster *c;	float div = ((float)dimensions)*dimensions*numClusters;	FILE *f;	long cnt;	initModel();	if(!numpoints)		numpoints = 10000000;	numIterations = 0;	do	{		reset();		for(c = clusters; c; c = c->getNext())			c->clear();		cnt = 0;		while((x = getPoint()) && (cnt++ < numpoints))		{			p.set(x);			c = closestCluster(&p);			c->addPoint(&p);		}		d = 0;		for(c = clusters; c; c = c->getNext())			d += c->updateMean();		d /= div;		numIterations++;	} while(d > convergenceThreshold);/*	f = fopen("kmeans.m", "w");	fprintf(f, "clf\n");	fprintf(f, "hold on\n");	for(c = clusters; c; c = c->getNext())	{		fprintf(f, "p=[");		c->computeStdDev();		c->print(f);		fprintf(f, "];\n");		fprintf(f, "plot(p(1),p(2),'o');\n");	}*/}double KMeans::distanceToTrueClusters(SyntheticData *db){	int i, k, l;	int permutation[numClusters];	int used[numClusters];	double md = 1e30, d;	for(l = 0; l < numClusters; l++)	{	used[l] = 0;		permutation[l] = -1;	}	l = 0;	k = 0;	while(l >= 0)	{		if(l == numClusters)		{//			for(i = 0; i < numClusters; i++)//				printf("%d ", permutation[i]);//			printf("\n");			d = permutationDistance(permutation, db);			if(d < md)				md = d;			l--;			continue;		}		if(permutation[l] >= 0)			used[permutation[l]] = 0;		while(++permutation[l] < numClusters && used[permutation[l]]);		if(permutation[l] >= numClusters)			l--;		else		{	used[permutation[l]] = 1;			l++;			if(l < numClusters)				permutation[l] = -1;		}	}	return md;}double KMeans::permutationDistance(int *permut, SyntheticData *db){	Subcluster *c;	double distance = 0;	Singleton p(dimensions);	int i = 0;	for(c = clusters; c; c = c->getNext())	{		p.set(db->getMeans(permut[i++]));		distance += sqrt(c->distanceSquared(&p));	}	return distance;}double KMeans::logLikelihood(void){	Subcluster *c;	float *x;	Singleton p(dimensions);	double ll, d, ll2;	for(c = clusters; c; c = c->getNext())	{		c->computeStdDev();	}	database->reset();	ll = 0;	while(x = database->getPoint())	{		p.set(x);		c = closestCluster(&p);		d = c->distanceSquared(&p);		ll2 = ll;		ll += d;		if(ll < ll2)			fprintf(stderr, "Overflow\n");	}	return ll;}void KMeans::reset(void){	database->reset();}float* KMeans::getPoint(void){	return database->getPoint();}BufferedKMeans::BufferedKMeans(Database *db, int k, int dim, int buffersize) :		KMeans(db, k, dim){	Singleton p(dim);	buffer = new RAMBuffer(p.getMemUsed()*buffersize, dim);}BufferedKMeans::~BufferedKMeans(){	delete buffer;}void BufferedKMeans::fillBuffer(void){	float *v;	Singleton *p;	database->reset();	while((v = database->getPoint()) && (p = buffer->newSingleton()))		p->set(v);}void BufferedKMeans::reset(void){	currentPoint = buffer->getRetainedSet();}float* BufferedKMeans::getPoint(void){	if(!currentPoint)		return 0;	float *v = currentPoint->get();	currentPoint = currentPoint->getNext();	return v;}/* End of file kmeans.cpp */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -