📄 subcluster.cpp
字号:
/* Scalable K-means clustering softwareCopyright (C) 2000 Fredrik Farnstrom and James LewisThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.See the file README.TXT for more information.*//* subcluster.cpp */#include <math.h>#include <stdio.h>#include "singleton.h"#include "subcluster.h"static int Allocated = 0;Subcluster::Subcluster(int dim) : dimensions(dim), next(0){ sum = new float[dim]; sumSquared = new float[dim]; mean = new float[dim]; stdDev = new float[dim]; numPoints = 0; sqrtNumPoints = 0; char modified = 0; clear(); Allocated++;// fprintf(stdout, "Allocated subcluster.\n");}Subcluster::~Subcluster(){ delete []mean; delete []sum; delete []sumSquared; delete []stdDev; Allocated--;// fprintf(stdout, "Free'd subcluster.\n"); if(Allocated < 0) fprintf(stderr, "Too many calls to ~Subcluster().\n"); else if(Allocated == 0) fprintf(stderr, "~Subcluster() OK.\n");}long Subcluster::getAllocated(void){ return Allocated;}void Subcluster::clear(void){ for(int i = 0; i < dimensions; i++) sum[i] = sumSquared[i] = 0; numPoints = 0; sqrtNumPoints = 0; modified = 0;}void Subcluster::addPoint(Singleton *x){ float *v = x->vector, *s = sum, *s2 = sumSquared; for(int i = 0; i < dimensions; i++) { *s2++ += *v**v; *s++ += *v++; } numPoints++;// printf("addPoint %d %d\n", this, numPoints); modified = 1;}void Subcluster::addCluster(Subcluster *x){ float *a = x->sum, *a2 = x->sumSquared; float *s = sum, *s2 = sumSquared; for(int i = 0; i < dimensions; i++) { *s2++ += *a2++; *s++ += *a++; } numPoints += x->numPoints; modified = 1;}void Subcluster::subCluster(Subcluster *x){ float *a = x->sum, *a2 = x->sumSquared; float *s = sum, *s2 = sumSquared; for(int i = 0; i < dimensions; i++) { *s2++ -= *a2++; *s++ -= *a++; } numPoints -= x->numPoints; modified = 1;}// Return the Mahalanobis distance between the point and the cluster mean.float Subcluster::Mahalanobis(float *x){ float t, dist = 0; float *m = mean, *sd = stdDev; for(int i = 0; i < dimensions; i++) { t = (*x++ - *m++) / *sd++; dist += t*t; } return dist;}float Subcluster::Mahalanobis(Singleton *p){ return Mahalanobis(p->vector);}float Subcluster::Mahalanobis(Subcluster *c){ return Mahalanobis(c->mean);}float Subcluster::distanceSquared(Singleton *x){ float t, dist = 0; float *y = mean, *v = x->vector; for(int i = 0; i < dimensions; i++) { t = *y++ - *v++; dist += t*t; } return dist;}float Subcluster::distanceSquared(Subcluster *x){ float t, dist = 0; float *y = mean, *v = x->mean; for(int i = 0; i < dimensions; i++) { t = *y++ - *v++; dist += t*t; } return dist;}float Subcluster::updateMean(void){ int i; float *s = sum, *m = mean; float t, totdist, e; if(!numPoints) return 0; totdist = 0; for(i = 0; i < dimensions; i++) { e = (t = *s++ / numPoints) - *m; totdist += e*e; *m++ = t; } return totdist;}void Subcluster::computeStdDev(){ if (!modified) return; float *s = sum, *ss = sumSquared, *sd = stdDev; float t; for(int i = 0; i < dimensions; i++) { t = *s++ / numPoints; *sd++ = (float) (sqrt(*ss++ / numPoints - t*t)); } sqrtNumPoints = (float) sqrt(numPoints); modified = 0;}float Subcluster::perturb(Singleton *p, int away, float maxStdDev){ float perturbMean; float U, L; float distU, distL, distMean; float *v = p->vector; float tmp = maxStdDev / sqrtNumPoints; float totalDist = 0; // For each dimension compute the confidence intervals and perturb the cluster. // The perturbed mean is then used to calculate the distance between the // singleton point and the perturbed mean of the subcluster. for(int i = 0; i < dimensions; i++) { U = mean[i] + (tmp * stdDev[i]); L = mean[i] - (tmp * stdDev[i]); distMean = fabs(v[i] - mean[i]); distU = fabs(v[i] - U); distL = fabs(v[i] - L); if (away) { if (distU > distMean && distU > distL) perturbMean = U; else if (distL > distMean && distL > distU) perturbMean = L; else perturbMean = mean[i]; } else { if (distU < distMean && distU < distL) perturbMean = U; else if (distL < distMean && distL < distU) perturbMean = L; else perturbMean = mean[i]; } totalDist += pow((perturbMean - v[i]), 2); } return totalDist;}// Determine if the confidence intervals in each dimension are bounded by beta.int Subcluster::isTight(float beta){ if(numPoints < 2) return 0; for (int i = 0; i < dimensions; i++) { if (stdDev[i] > beta) return 0; } return 1;}void Subcluster::print(FILE *f){ for(int i = 0; i < dimensions; i++) fprintf(f, "%f ", mean[i]); fprintf(f, "\n");}void Subcluster::printConfidence(FILE *f){ for(int i = 0; i < dimensions; i++) fprintf(f, "%f ", stdDev[i]/sqrtNumPoints); fprintf(f, "\n");}/* End of file subcluster.cpp */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -