📄 cluster3.c
字号:
/*---------------------------------------------------------------------- File : cluster3.c Contents: cluster validity measures Author : Christian Borgelt History : 17.05.2003 file created----------------------------------------------------------------------*/#include <stdlib.h>#include <limits.h>#include <float.h>#include <math.h>#include <assert.h>#include "cluster.h"/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef double CLSVALFN (CLSET *clset);/*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static double _intexp (double x, int n){ /* --- fast integer exponentiation */ double r = (n & 1) ? x : 1; /* result */ for (n >>= 1; n > 0; n >>= 1) { x *= x; /* traverse the powers of 2 and */ if (n & 1) r *= x; /* if the corr. exponent bit is set */ } /* multiply them into the result */ return r; /* return the result */} /* _intexp() *//*--------------------------------------------------------------------*/static double _dist (CLSET *clset, CLUSTER *c1, CLUSTER *c2){ /* --- distance of clusters */ int i; /* loop variable */ double *b; /* center difference vector */ for (b = clset->buf, i = clset->incnt; --i >= 0; ) b[i] = c1->ctr[i] -c2->ctr[i]; if (clset->type & CLS_COVARS) /* if to use covariances */ return 0.5 *(mat_mulvmv(c1->inv, b) +mat_mulvmv(c2->inv, b)); if (clset->type & CLS_VARS) /* if to use only variances */ return 0.5 *(mat_mulvdv(c1->inv, b) +mat_mulvdv(c2->inv, b)); if (clset->type & CLS_SIZE) /* if to use only isotropic variance */ return vec_sqrlen(b, clset->incnt) / (0.5 *(c1->var +c2->var)); return vec_sqrlen(b, clset->incnt);} /* _dist() */ /* if to use no size *//*---------------------------------------------------------------------- Cluster Validity Functions----------------------------------------------------------------------*/static double _objfn (CLSET *clset){ /* --- objective function */ int i; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ double r = 0; /* result */ for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) r += (--c)->val[1]; /* sum the weighted distance sums */ return r; /* and return this sum */} /* _objfn() *//*--------------------------------------------------------------------*/static double _coverage (CLSET *clset){ /* --- coverage of data points */ return clset->eval.sum; /* return the sum of membership degs. */} /* _coverage() *//*--------------------------------------------------------------------*/static double _fdbidx (CLSET *clset){ /* --- fuzzy Davies-Bouldin index */ int i, k; /* loop variables */ CLUSTER *c1, *c2; /* to traverse the clusters */ double max, t; /* maximum of values, buffer */ double r = 0; /* result */ for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) { --c1; /* traverse the clusters */ c1->val[3] = (c1->val[0] > 0) ? c1->val[1]/c1->val[0] : 0; } /* compute the average distances */ for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) { --c1; max = 0; /* traverse the clusters */ for (c2 = clset->cls +(k = clset->clscnt); --k >= 0; ) { if (--c2 == c1) continue; /* traverse the other clusters */ t = _dist(clset, c1, c2); /* compute the cluster distance */ t = (t > 0) ? (c1->val[3] +c2->val[3]) /t : 0; if (t > max) max = t; /* determine the maximum */ } /* of the size to distance ratio */ r += max; /* and sum these maxima */ } return r /clset->clscnt; /* return the average over clusters */} /* _fdbidx() *//*--------------------------------------------------------------------*/static double _pcoeff (CLSET *clset){ /* --- partition coefficient */ return clset->eval.sqr /clset->eval.cnt;} /* _pcoeff() *//*--------------------------------------------------------------------*/static double _pcnorm (CLSET *clset){ /* --- norm. partition coefficient */ if ((clset->clscnt <= 1) || (clset->eval.cnt <= 0)) return 0; /* check for valid denominators */ return 1 -(clset->clscnt /(clset->clscnt -1)) *(1 -clset->eval.sqr /clset->eval.cnt);} /* _pcnorm() *//*--------------------------------------------------------------------*/static double _pentropy (CLSET *clset){ /* --- partition entropy */ return (clset->eval.ent < 0) ? -clset->eval.ent /(clset->clscnt*LN_2) : 0;} /* _pentropy() *//*--------------------------------------------------------------------*/static double _fhypvol (CLSET *clset){ /* --- fuzzy hyper-volume */ int i; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ double r = 0; /* result */ for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) r += pow((--c)->var, 0.5*clset->incnt); return r; /* sum the roots of the determinants */} /* _fhypvol() */ /* and return this sum *//*--------------------------------------------------------------------*/static double _prpexp (CLSET *clset){ /* --- proportion exponent */ return clset->eval.pxp /LN_2;} /* _prpexp() *//*--------------------------------------------------------------------*/static double _sepdeg (CLSET *clset){ /* --- separation degree */ int i, k; /* loop variables */ CLUSTER *c1, *c2; /* to traverse the clusters */ double min = DBL_MAX; /* minimum of cluster distances */ double r = 0, t; /* result, temporary buffer */ if (clset->clscnt <= 1) return 0; for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) { r += (--c1)->val[1]; /* compute the objective function */ for (c2 = clset->cls +(k = clset->clscnt); --k >= 0; ) { if (--c2 == c1) continue; /* traverse the other clusters */ t = _dist(clset, c1, c2); /* compute the cluster distance */ if (t < min) min = t; /* and determine the minimum */ } /* of these distances */ } min *= clset->eval.cnt; /* compute the denominator and */ return (min > 0) ? r/min : 0; /* return the separation degree */} /* _sepdeg() *//*--------------------------------------------------------------------*/static double _partdens (CLSET *clset){ /* --- partition density */ int i; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ double r = 0, d = 0; /* result and denominator */ for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) { r += (--c)->val[2]; /* sum the assigned points sum */ d += pow(c->var, 0.5*clset->incnt); } /* sum the roots of the determinants */ return (d > 0) ? r/d : 0; /* return the partition density */} /* _partdens() *//*--------------------------------------------------------------------*/static double _pdavg (CLSET *clset){ /* --- average partition density */ int i; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ double r = 0, t; /* result, temporary buffer */ for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) { t = pow((--c)->var, 0.5*clset->incnt); if (t > 0) r += c->val[2] /t; } /* cluster-spec. partition density */ return r / clset->clscnt; /* compute and return average */} /* _pdavg() *//*--------------------------------------------------------------------*/static double _noise (CLSET *clset){ /* --- sum of m.s. to noise cluster */ return clset->eval.noise;} /* _noise() *//*--------------------------------------------------------------------*/static CLSVALFN *_clsvalfn[] = { /* CLS_OBJFN 0 */ _objfn, /* CLS_COVERAGE 1 */ _coverage, /* CLS_FDBIDX 2 */ _fdbidx, /* CLS_PCOEFF 3 */ _pcoeff, /* CLS_PCNORM 4 */ _pcnorm, /* CLS_PENTROPY 5 */ _pentropy, /* CLS_FHYPVOL 6 */ _fhypvol, /* CLS_PRPEXP 7 */ _prpexp, /* CLS_SEPDEG 8 */ _sepdeg, /* CLS_PARTDENS 9 */ _partdens, /* CLS_PDAVG 10 */ _pdavg, /* CLS_NOISE 11 */ _noise,};/*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/void cls_evcfg (CLSET *clset, int measure, double param){ /* --- configure cluster evaluation */ int i; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ CLSEVAL *e = &clset->eval; /* evaluation variables */ assert(clset); /* check the function argument */ if (measure < 0) { /* if no measure is given, */ e->flags = 0; /* clear all measure flags */ for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) c->val[0] = c->val[1] = c->val[2] = 0; e->cnt = e->sum = e->sqr = e->ent = e->pxp = e->noise = 0; } else { /* if a measure is given */ e->flags |= 1 << measure; /* set the measure flag */ if (measure == CLS_PRPEXP) /* note the parameter */ e->rad = param; /* for the proportion exponent */ } /* (minimal membership degree) */} /* cls_evcfg() *//*--------------------------------------------------------------------*/void cls_evaggr (CLSET *clset, const double *vec, double weight){ /* --- aggregate for evaluation */ int i, k, n; /* loop variable */ CLUSTER *c; /* to traverse the clusters */ CLSEVAL *e; /* evaluation variables */ double max = 0; /* maximum membership degree */ double t, x, y; /* temporary buffer */ assert(clset); /* check the function argument */ cls_exec(clset, vec, NULL); /* execute the cluster set */ e = &clset->eval; /* get the evaluation variables */ e->cnt += weight; /* sum the pattern weight */ x = fabs(clset->msexp); /* get the adaptation exponent */ if (x <= 0) x = 1; /* and adapt it */ y = (e->flags & CLS_PENTROPY) ? 0 : DBL_MAX; for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) { t = (--c)->msd; /* traverse the clusters */ e->sum += t*weight; /* sum the membership degrees */ e->sqr += t*t*weight; /* and their squares */ if (t > max) max = t; /* determine their maximum */ if (t > y) e->ent += t *log(t); if (c->d2 < e->rad) c->val[2] += t; if (x == 2) t *= 2; /* use adaptation exponent */ else if (x != 1) t = pow(t, x); c->val[0] += t *= weight; /* sum the membership degrees */ c->val[1] += t *c->d2; /* and the weighted distances */ } /* for each cluster */ e->noise += clset->msd[1]; /* sum memberships to noise cluster */ if (e->flags & CLS_PRPEXP) { /* if to compute proportion exponent */ x = 0; y = -1; i = clset->clscnt; n = (int)floor(1/max); /* compute number of clusters */ if (n > clset->clscnt << 6) n = clset->clscnt << 6; for (k = 1; k <= n; k++) { /* and traverse them */ y *= (k -i -1) / k; /* compute next t choose k */ x += y *_intexp(1 -k *max, i -1); } /* add next term */ e->pxp -= log(x); /* sum the logarithms */ } /* of cluster-specific terms */} /* cls_evaggr() *//*--------------------------------------------------------------------*/double cls_eval (CLSET *clset, int measure){ /* --- compute evaluation measure */ if ((measure < 0) || (measure >= CLS_VMCNT)) return 0; /* check the measure range */ return _clsvalfn[measure](clset);} /* cls_eval() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -