⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster3.c

📁 聚类算法全集以及内附数据集
💻 C
字号:
/*----------------------------------------------------------------------  File    : cluster3.c  Contents: cluster validity measures  Author  : Christian Borgelt  History : 17.05.2003 file created----------------------------------------------------------------------*/#include <stdlib.h>#include <limits.h>#include <float.h>#include <math.h>#include <assert.h>#include "cluster.h"/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2         0.69314718055994530942  /* ln(2) *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef double CLSVALFN (CLSET *clset);/*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static double _intexp (double x, int n){                               /* --- fast integer exponentiation */  double r = (n & 1) ? x : 1;   /* result */  for (n >>= 1; n > 0; n >>= 1) {    x *= x;                     /* traverse the powers of 2 and */    if (n & 1) r *= x;          /* if the corr. exponent bit is set */  }                             /* multiply them into the result */  return r;                     /* return the result */}  /* _intexp() *//*--------------------------------------------------------------------*/static double _dist (CLSET *clset, CLUSTER *c1, CLUSTER *c2){                               /* --- distance of clusters */  int    i;                     /* loop variable */  double *b;                    /* center difference vector */  for (b = clset->buf, i = clset->incnt; --i >= 0; )    b[i] = c1->ctr[i] -c2->ctr[i];  if (clset->type & CLS_COVARS) /* if to use covariances */    return 0.5 *(mat_mulvmv(c1->inv, b) +mat_mulvmv(c2->inv, b));  if (clset->type & CLS_VARS)   /* if to use only variances */    return 0.5 *(mat_mulvdv(c1->inv, b) +mat_mulvdv(c2->inv, b));  if (clset->type & CLS_SIZE)   /* if to use only isotropic variance */    return vec_sqrlen(b, clset->incnt) / (0.5 *(c1->var +c2->var));  return vec_sqrlen(b, clset->incnt);}  /* _dist() */                /* if to use no size *//*----------------------------------------------------------------------  Cluster Validity Functions----------------------------------------------------------------------*/static double _objfn (CLSET *clset){                               /* --- objective function */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  r = 0;                /* result */  for (c = clset->cls +(i = clset->clscnt); --i >= 0; )    r += (--c)->val[1];         /* sum the weighted distance sums */  return r;                     /* and return this sum */}  /* _objfn() *//*--------------------------------------------------------------------*/static double _coverage (CLSET *clset){                               /* --- coverage of data points */  return clset->eval.sum;       /* return the sum of membership degs. */}  /* _coverage() *//*--------------------------------------------------------------------*/static double _fdbidx (CLSET *clset){                               /* --- fuzzy Davies-Bouldin index */  int     i, k;                 /* loop variables */  CLUSTER *c1, *c2;             /* to traverse the clusters */  double  max, t;               /* maximum of values, buffer */  double  r = 0;                /* result */  for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) {    --c1;                       /* traverse the clusters */    c1->val[3] = (c1->val[0] > 0) ? c1->val[1]/c1->val[0] : 0;  }                             /* compute the average distances */  for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) {    --c1; max = 0;              /* traverse the clusters */    for (c2 = clset->cls +(k = clset->clscnt); --k >= 0; ) {      if (--c2 == c1) continue; /* traverse the other clusters */      t = _dist(clset, c1, c2); /* compute the cluster distance */      t = (t > 0) ? (c1->val[3] +c2->val[3]) /t : 0;      if (t > max) max = t;     /* determine the maximum */    }                           /* of the size to distance ratio */    r += max;                   /* and sum these maxima */  }  return r /clset->clscnt;      /* return the average over clusters */}  /* _fdbidx() *//*--------------------------------------------------------------------*/static double _pcoeff (CLSET *clset){                               /* --- partition coefficient */  return clset->eval.sqr /clset->eval.cnt;}  /* _pcoeff() *//*--------------------------------------------------------------------*/static double _pcnorm (CLSET *clset){                               /* --- norm. partition coefficient */  if ((clset->clscnt <= 1) || (clset->eval.cnt <= 0))    return 0;                   /* check for valid denominators */  return 1 -(clset->clscnt /(clset->clscnt -1))           *(1 -clset->eval.sqr /clset->eval.cnt);}  /* _pcnorm() *//*--------------------------------------------------------------------*/static double _pentropy (CLSET *clset){                               /* --- partition entropy */  return (clset->eval.ent < 0)       ? -clset->eval.ent /(clset->clscnt*LN_2) : 0;}  /* _pentropy() *//*--------------------------------------------------------------------*/static double _fhypvol (CLSET *clset){                               /* --- fuzzy hyper-volume */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  r = 0;                /* result */  for (c = clset->cls +(i = clset->clscnt); --i >= 0; )    r += pow((--c)->var, 0.5*clset->incnt);  return r;                     /* sum the roots of the determinants */}  /* _fhypvol() */             /* and return this sum *//*--------------------------------------------------------------------*/static double _prpexp (CLSET *clset){                               /* --- proportion exponent */  return clset->eval.pxp /LN_2;}  /* _prpexp() *//*--------------------------------------------------------------------*/static double _sepdeg (CLSET *clset){                               /* --- separation degree */  int     i, k;                 /* loop variables */  CLUSTER *c1, *c2;             /* to traverse the clusters */  double  min = DBL_MAX;        /* minimum of cluster distances */  double  r   = 0, t;           /* result, temporary buffer */  if (clset->clscnt <= 1) return 0;  for (c1 = clset->cls +(i = clset->clscnt); --i >= 0; ) {    r += (--c1)->val[1];        /* compute the objective function */    for (c2 = clset->cls +(k = clset->clscnt); --k >= 0; ) {      if (--c2 == c1) continue; /* traverse the other clusters */      t = _dist(clset, c1, c2); /* compute the cluster distance */      if (t < min) min = t;     /* and determine the minimum */    }                           /* of these distances */  }  min *= clset->eval.cnt;       /* compute the denominator and */  return (min > 0) ? r/min : 0; /* return the separation degree */}  /* _sepdeg() *//*--------------------------------------------------------------------*/static double _partdens (CLSET *clset){                               /* --- partition density */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  r = 0, d = 0;         /* result and denominator */  for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) {    r += (--c)->val[2];         /* sum the assigned points sum */    d += pow(c->var, 0.5*clset->incnt);  }                             /* sum the roots of the determinants */  return (d > 0) ? r/d : 0;     /* return the partition density */}  /* _partdens() *//*--------------------------------------------------------------------*/static double _pdavg (CLSET *clset){                               /* --- average partition density */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  r = 0, t;             /* result, temporary buffer */  for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) {    t = pow((--c)->var, 0.5*clset->incnt);    if (t > 0) r += c->val[2] /t;  }                             /* cluster-spec. partition density */  return r / clset->clscnt;     /* compute and return average */}  /* _pdavg() *//*--------------------------------------------------------------------*/static double _noise (CLSET *clset){                               /* --- sum of m.s. to noise cluster */  return clset->eval.noise;}  /* _noise() *//*--------------------------------------------------------------------*/static CLSVALFN *_clsvalfn[] = {  /* CLS_OBJFN     0 */  _objfn,  /* CLS_COVERAGE  1 */  _coverage,  /* CLS_FDBIDX    2 */  _fdbidx,  /* CLS_PCOEFF    3 */  _pcoeff,  /* CLS_PCNORM    4 */  _pcnorm,  /* CLS_PENTROPY  5 */  _pentropy,  /* CLS_FHYPVOL   6 */  _fhypvol,  /* CLS_PRPEXP    7 */  _prpexp,  /* CLS_SEPDEG    8 */  _sepdeg,  /* CLS_PARTDENS  9 */  _partdens,  /* CLS_PDAVG    10 */  _pdavg,  /* CLS_NOISE    11 */  _noise,};/*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/void cls_evcfg (CLSET *clset, int measure, double param){                               /* --- configure cluster evaluation */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  CLSEVAL *e = &clset->eval;    /* evaluation variables */  assert(clset);                /* check the function argument */  if (measure < 0) {            /* if no measure is given, */    e->flags = 0;               /* clear all measure flags */    for (c = clset->cls +(i = clset->clscnt); --i >= 0; )      c->val[0] = c->val[1] = c->val[2] = 0;    e->cnt = e->sum = e->sqr = e->ent = e->pxp = e->noise = 0; }  else {                        /* if a measure is given */    e->flags |= 1 << measure;   /* set the measure flag */    if (measure == CLS_PRPEXP)  /* note the parameter */      e->rad = param;           /* for the proportion exponent */  }                             /* (minimal membership degree) */}  /* cls_evcfg() *//*--------------------------------------------------------------------*/void cls_evaggr (CLSET *clset, const double *vec, double weight){                               /* --- aggregate for evaluation */  int     i, k, n;              /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  CLSEVAL *e;                   /* evaluation variables */  double  max = 0;              /* maximum membership degree */  double  t, x, y;              /* temporary buffer */  assert(clset);                /* check the function argument */  cls_exec(clset, vec, NULL);   /* execute the cluster set */  e = &clset->eval;             /* get the evaluation variables */  e->cnt += weight;             /* sum the pattern weight */  x = fabs(clset->msexp);       /* get the adaptation exponent */  if (x <= 0) x = 1;            /* and adapt it */  y = (e->flags & CLS_PENTROPY) ? 0 : DBL_MAX;  for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) {    t = (--c)->msd;             /* traverse the clusters */    e->sum += t*weight;         /* sum the membership degrees */    e->sqr += t*t*weight;       /* and their squares */    if (t > max) max = t;       /* determine their maximum */    if (t > y) e->ent += t *log(t);    if (c->d2 < e->rad) c->val[2] += t;    if      (x == 2) t *= 2;    /* use adaptation exponent */    else if (x != 1) t = pow(t, x);    c->val[0] += t *= weight;   /* sum the membership degrees */    c->val[1] += t *c->d2;      /* and the weighted distances */  }                             /* for each cluster */  e->noise += clset->msd[1];    /* sum memberships to noise cluster */  if (e->flags & CLS_PRPEXP) {  /* if to compute proportion exponent */    x = 0; y = -1; i = clset->clscnt;    n = (int)floor(1/max);      /* compute number of clusters */    if (n > clset->clscnt << 6) n = clset->clscnt << 6;    for (k = 1; k <= n; k++) {  /* and traverse them */      y *= (k -i -1) / k;       /* compute next t choose k */      x += y *_intexp(1 -k *max, i -1);    }                           /* add next term */    e->pxp -= log(x);           /* sum the logarithms */  }                             /* of cluster-specific terms */}  /* cls_evaggr() *//*--------------------------------------------------------------------*/double cls_eval (CLSET *clset, int measure){                               /* --- compute evaluation measure */  if ((measure < 0) || (measure >= CLS_VMCNT))    return 0;                   /* check the measure range */  return _clsvalfn[measure](clset);}  /* cls_eval() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -