📄 evaluation.c
字号:
/* Ant-based Clustering Copyright (C) 2004 Julia Handl Email: Julia.Handl@gmx.de This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#include "evaluation.h"#include <time.h>evaluation::evaluation(conf * c) { par = c; docindex = NULL;}evaluation::~evaluation() { if (docindex != NULL) { for (int i=0; i<par->binsize; i++) { delete docindex[i]; } delete [] docindex; } }void evaluation::init(databin<USED_DATA_TYPE> * b, clustering * cl) { bin = b; clust = cl; docindex = NULL;}void evaluation::init(databin<USED_DATA_TYPE> * b, grid * g, clustering * cl, position<double> ** index) { bin = b; env = g; clust = cl; if (index != NULL) { docindex = index; } else { if (env != NULL) { // store the positions for all data elements docindex = new position<double>*[par->binsize]; for (int i=0; i<par->binsize; i++) { docindex[i] = NULL; } for (int i=0; i<par->imax; i++) { for (int j=0; j<par->jmax; j++) { if ((*env)(i,j) != FREE) { docindex[(*env)(i,j)] = new position<double>(i,j); } } } } else docindex = NULL; }}double evaluation::square(double x) { return x*x;}double evaluation::spatialdistance(position<int> * p1, position<int> * p2, int torus) { int dx = abs((*p1)[0]-(*p2)[0]); if (torus == TRUE) dx = min(dx, par->imax - dx); int dy = abs((*p1)[1]-(*p2)[1]); if (torus == TRUE) dy = min(dy, par->jmax - dy); return sqrt(square(dx)+square(dy));}double evaluation::spatialdistance(position<double> * p1, position<double> * p2, int torus) { double dx = abs((*p1)[0]-(*p2)[0]); if (torus == TRUE) dx = min(dx, par->imax - dx); double dy = abs((*p1)[1]-(*p2)[1]); if (torus == TRUE) dy = min(dy, par->jmax - dy); return sqrt(square(dx)+square(dy));}/**************************************************************************************** Evaluation measures for unsupervised clustering, i.e. when the real partitions are NOT known - Variance - Dunn index Both were used in the final experiments ****************************************************************************************/double evaluation::dunn_av() { if (clust->num <= 1) { //cerr << "Dunn Index cannot be computed for one single cluster" << endl; return 0; } double diam = 0.0; double diamold = 0.0; int ** assignment = new int * [clust->num]; int * ctr = new int[clust->num]; for (int i=0; i<clust->num; i++) { ctr[i] = 0; assignment[i] = new int[par->binsize+1]; for (int j=0; j<=par->binsize; j++) { assignment[i][j] = FREE; } } for (int i=0; i<par->binsize; i++) { int index = (*clust)[i]; assignment[index][ctr[index]] = i; ctr[index]++; } // find maximum cluster diameter based on average distance for (int i=0; i<clust->num; i++) { int j = 0; diam = 0.0; int ctr = 0; while (assignment[i][j] != FREE) { int k = j+1; while (assignment[i][k] != FREE) { diam += bin->precomputed_d(assignment[i][j],assignment[i][k]); ctr++; k++; } j++; } diam /= double(ctr); diam = max(diam, diamold); diamold = diam; } // find minimal cluster distance double mind = 0; for (int i=0; i<clust->num; i++) { data<USED_DATA_TYPE> di(par, clust->centres[i]); for (int j=0; j<i; j++) { data<USED_DATA_TYPE> dj(par, clust->centres[j]); if ((i == 1) && (j == 0)) mind = di.distanceto(dj) / par->max; mind = min(mind, di.distanceto(dj) / par->max); } } for (int i=0; i<clust->num; i++) { delete [] assignment[i]; } delete [] assignment; delete [] ctr; return mind / diam;}double evaluation::variance() { double * var = new double[clust->num]; int * ctr = new int[clust->num]; // initialisation for (int i=0; i<clust->num; i++) { var[i] = 0.0; ctr[i] = 0; } for (int i=0; i<par->binsize; i++) { double diff = 0.0; int p = clust->partition[i]; data<USED_DATA_TYPE> di(par, clust->centres[p]); diff = (*bin)[i].distanceto(di); var[p] += diff*diff; } double totalvariance = 0.0; for (int i=0; i<clust->num; i++) { totalvariance += var[i]; } totalvariance /= double(par->binsize); delete [] var; delete [] ctr; return sqrt(totalvariance);}/**************************************************************************************** Evaluation measures for supervised clustering, i.e. when the real partitions are known - Number of identified clusters - F-Measure - Rand-Index In the final experiments only the F-Measure and the Rand-Index were used. ****************************************************************************************/double evaluation::number() { return double(clust->num);}double evaluation::clusternumber() { return double(clust->num);} double evaluation::fmeasure(int b) { int ** assignments = new int* [clust->num]; for (int i=0; i<clust->num; i++) { assignments[i] = new int [par->kclusters]; for (int j=0; j<par->kclusters; j++) { assignments[i][j] = 0; } } for (int i=0; i<par->binsize; i++) { int p = clust->partition[i]; int real_p = (*bin)[i].cluster; assignments[p][real_p]++; } double totalf = 0.0; int num = clust->num; double ** f = new double *[clust->num]; for (int i=0; i<clust->num; i++) { f[i] = new double[par->kclusters]; } for (int i=0; i<clust->num; i++) { int max = 0; int maxnum = 0; int size = 0; for (int j=0; j<par->kclusters; j++) { // determine n_j size += assignments[i][j]; } for (int j=0; j<par->kclusters; j++) { if ((size != 0) && (assignments[i][j] != 0)){ // n_ij / n_i double r = double(assignments[i][j]) / double(par->size_cluster[j]); // n_ij / n_j double p = double(assignments[i][j]) / double(size); // F-value for class j and cluster i f[i][j] = (double(b*b)+1.0)*p*r / (double(b*b)*p + r); } else f[i][j] = 0; } } for (int j=0; j<par->kclusters; j++) { // find max{F(i,j)} double maxf = 0.0; for (int i=0; i<clust->num; i++) { maxf = max(maxf, f[i][j]); } totalf += double(par->size_cluster[j]) / double(par->binsize) * maxf; } for (int i=0; i<clust->num; i++) { delete [] assignments[i]; delete [] f[i]; } delete [] assignments; delete [] f; return totalf;} double evaluation::randindex() { int a = 0; int b = 0; int c = 0; int d = 0; double r = 0.0; for (int i=0; i<par->binsize; i++) { int p1 = clust->partition[i]; int q1 = (*bin)[i].cluster; for (int j=0; j<par->binsize; j++) { int p2 = clust->partition[j]; int q2 = (*bin)[j].cluster; if ((p1 == p2) && (q1 == q2)) a++; if ((p1 == p2) && !(q1 == q2)) b++; if (!(p1 == p2) && (q1 == q2)) c++; if (!(p1 == p2) && !(q1 == q2)) d++; } } r = double(a+d)/double(a+b+c+d); return r;}double evaluation::randindex(clustering * c1, clustering * c2, int size) { int a = 0; int b = 0; int c = 0; int d = 0; double r = 0.0; for (int i=0; i<size; i++) { int p1 = c1->partition[i]; int q1 = c2->partition[i]; for (int j=0; j<size; j++) { int p2 = c1->partition[j]; int q2 = c2->partition[j]; if ((p1 == p2) && (q1 == q2)) a++; if ((p1 == p2) && !(q1 == q2)) b++; if (!(p1 == p2) && (q1 == q2)) c++; if (!(p1 == p2) && !(q1 == q2)) d++; } } r = double(a+d)/double(a+b+c+d); return r;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -