📄 kmeans.cpp
字号:
/* Scalable K-means clustering softwareCopyright (C) 2000 Fredrik Farnstrom and James LewisThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.See the file README.TXT for more information.*//* kmeans.cpp */#include <math.h>#include <stdlib.h>#include <stdio.h>#include "singleton.h"#include "subcluster.h"#include "kmeans.h"#include "database.h"#include "syntheticdata.h"#include "rambuffer.h"void KMeans::setup(void){ int i; Subcluster *m; clusters = 0; for(i = 0; i < numClusters; i++) { m = new Subcluster(dimensions); if(clusters) clusters->setPrevious(m); m->setNext(clusters); clusters = m; }}void KMeans::initModel(void){ Subcluster *c; Singleton p(dimensions); int i, j; float *x; database->reset(); for(c = clusters; c; c = c->getNext()) { j = (rand() >> 3) % 80; for(i = 0; i <= j; i++) x = database->getPoint(); p.set(x); c->addPoint(&p); c->updateMean(); }}Subcluster* KMeans::closestCluster(Singleton *p){ float d = 1e20, t; Subcluster *m, *r; for(m = clusters; m; m = m->getNext()) if((t = m->distanceSquared(p)) < d) { r = m; d = t; } return r;}KMeans::KMeans(Database *db, int k, int dim) : database(db), numClusters(k), dimensions(dim){ convergenceThreshold = defaultConvergenceThreshold; setup();}KMeans::~KMeans(){ Subcluster *c, *nc; for(c = clusters; c; c = nc) { nc = c->getNext(); delete c; }}void KMeans::cluster(long numpoints){ Singleton p(dimensions); float *x, d; Subcluster *c; float div = ((float)dimensions)*dimensions*numClusters; FILE *f; long cnt; initModel(); if(!numpoints) numpoints = 10000000; numIterations = 0; do { reset(); for(c = clusters; c; c = c->getNext()) c->clear(); cnt = 0; while((x = getPoint()) && (cnt++ < numpoints)) { p.set(x); c = closestCluster(&p); c->addPoint(&p); } d = 0; for(c = clusters; c; c = c->getNext()) d += c->updateMean(); d /= div; numIterations++; } while(d > convergenceThreshold);/* f = fopen("kmeans.m", "w"); fprintf(f, "clf\n"); fprintf(f, "hold on\n"); for(c = clusters; c; c = c->getNext()) { fprintf(f, "p=["); c->computeStdDev(); c->print(f); fprintf(f, "];\n"); fprintf(f, "plot(p(1),p(2),'o');\n"); }*/}double KMeans::distanceToTrueClusters(SyntheticData *db){ int i, k, l; int permutation[numClusters]; int used[numClusters]; double md = 1e30, d; for(l = 0; l < numClusters; l++) { used[l] = 0; permutation[l] = -1; } l = 0; k = 0; while(l >= 0) { if(l == numClusters) {// for(i = 0; i < numClusters; i++)// printf("%d ", permutation[i]);// printf("\n"); d = permutationDistance(permutation, db); if(d < md) md = d; l--; continue; } if(permutation[l] >= 0) used[permutation[l]] = 0; while(++permutation[l] < numClusters && used[permutation[l]]); if(permutation[l] >= numClusters) l--; else { used[permutation[l]] = 1; l++; if(l < numClusters) permutation[l] = -1; } } return md;}double KMeans::permutationDistance(int *permut, SyntheticData *db){ Subcluster *c; double distance = 0; Singleton p(dimensions); int i = 0; for(c = clusters; c; c = c->getNext()) { p.set(db->getMeans(permut[i++])); distance += sqrt(c->distanceSquared(&p)); } return distance;}double KMeans::logLikelihood(void){ Subcluster *c; float *x; Singleton p(dimensions); double ll, d, ll2; for(c = clusters; c; c = c->getNext()) { c->computeStdDev(); } database->reset(); ll = 0; while(x = database->getPoint()) { p.set(x); c = closestCluster(&p); d = c->distanceSquared(&p); ll2 = ll; ll += d; if(ll < ll2) fprintf(stderr, "Overflow\n"); } return ll;}void KMeans::reset(void){ database->reset();}float* KMeans::getPoint(void){ return database->getPoint();}BufferedKMeans::BufferedKMeans(Database *db, int k, int dim, int buffersize) : KMeans(db, k, dim){ Singleton p(dim); buffer = new RAMBuffer(p.getMemUsed()*buffersize, dim);}BufferedKMeans::~BufferedKMeans(){ delete buffer;}void BufferedKMeans::fillBuffer(void){ float *v; Singleton *p; database->reset(); while((v = database->getPoint()) && (p = buffer->newSingleton())) p->set(v);}void BufferedKMeans::reset(void){ currentPoint = buffer->getRetainedSet();}float* BufferedKMeans::getPoint(void){ if(!currentPoint) return 0; float *v = currentPoint->get(); currentPoint = currentPoint->getNext(); return v;}/* End of file kmeans.cpp */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -