⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster2.c

📁 it is the Data Mining Algorithm source code.
💻 C
📖 第 1 页 / 共 4 页
字号:
      if (!(t > 0)) continue;   /* and check against maximum ratio */      a = t /(s -1);            /* compute the regularization value */    }                           /* to ensure the maximum ratio */    assert(a >= 0);             /* check the (computed) shift value */    mat_diaadds(c->cov, a);     /* and add it to the diagonal */    if (clset->type & CLS_COVARS) { /* if full covariance matrix */      for (t = 1, k = n; --k >= 0; )        t *= c->dif[k] += a;    /* update the eigenvalues */      c->var = ((t >= MINDET) && (t <= MAXDET))             ? pow(t, 1.0/n) : exp(vec_sumlog(c->dif, n) /n); }    else {                      /* if only variances */      t = mat_diaprod(c->cov);  /* compute the determinant */      c->var = ((t >= MINDET) && (t <= MAXDET))             ? pow(t, 1.0/n) : exp(mat_dialog(c->cov) /n);    }                           /* compute the new isotropic variance */  }}  /* _regshape() *//*--------------------------------------------------------------------*/static void _regsize (CLSET *clset){                               /* --- regularize cluster sizes */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  min, max;             /* minimum and maximum size */  double  s, t, a, b;           /* temporary buffers */  assert(clset                  /* check the function argument */     && (clset->type & CLS_SIZE) && !(clset->type & CLS_JOINT)     && ((clset->fwexp <= 0) || (clset->fwexp == 1)));  s = clset->regps[2];          /* get the scaling factor, */  a = clset->regps[1] *0.5;     /* the variance exponent, and */  b = clset->regps[0];          /* the regularization offset */  if ((a <= 0) || (s == 0)      /* check whether regularization */  || ((s == 1) && (b == 0)))    /* is actually to be done */    return;                     /* (active parameter combination) */  c = clset->cls +(i = clset->clscnt);  if (a == 1) {                 /* if to use the isotropic variance, */    while (--i >= 0) { --c;     /* simply copy the isotropic variance */      c->d2 = c->var; } }       /* (most natural size measure) */  else {                        /* if to use some power instead, */    while (--i >= 0) { --c;     /* compute the requested power */      c->d2 = pow(c->var, a); } /* of the isotropic variance */  }                             /* as the cluster size measure */  if (b < -1) {                 /* if alternative version */    min = DBL_MAX; max = 0;     /* initialize the size range */    for (c += i = clset->clscnt; --i >= 0; ) {      t = (--c)->d2;            /* traverse the cluster sizes */      if (t < min) min = t;     /* and determine their minimum */      if (t > max) max = t;     /* and their maximum for the */    }                           /* size ratio computation */    t = max +b *min;            /* compute numerator of fraction, */    if (!(t > 0)) return;       /* check against the maximum ratio, */    b = t /(-1 -b);             /* and compute the necessary offset */  }                             /* to ensure the maximum ratio */  assert(b >= 0);               /* check the (computed) offset */  t  = 0;                       /* initialize the cluster size sum */  c += i = clset->clscnt;       /* and traverse the cluster array */  if (s <= 0) {                 /* if to compute simplified version */    if (b < 0) return;          /* check for a non-negative offset */    while (--i >= 0)            /* increase the cluster sizes */      (--c)->d2 += b;           /* by the given offset and */    s = -s; }                   /* get the (re)scaling factor */  else if (b < 0) {             /* if to equalize the sizes fully */    while (--i >= 0) { --c;     /* sum the cluster sizes and */      t += c->d2; c->d2 = 1; }  /* set unit cluster sizes */    s *= t /clset->clscnt; }    /* compute the (re)scaling factor */  else {                        /* if to equalize the sizes partially */    while (--i >= 0) { --c;     /* sum the cluster sizes and */      t += c->d2; c->d2 += b; } /* increase them by the given offset */    s *= t/(t +clset->clscnt *b);  }                             /* compute the (re)scaling factor */  a = 1/a;                      /* get the inverse variance exponent */  for (c += i = clset->clscnt; --i >= 0; ) {    t = pow(s *(--c)->d2, a);   /* compute the new cluster size */    b = t /c->var; c->var = t;  /* and the rescaling factor */    if (!(clset->type & CLS_COVARS))      mat_diamuls(c->cov, b);   /* rescale the variances */    else {                      /* if adaptable covariances */      mat_mulsx(c->cov, c->cov, b, MAT_UPPER);      if (clset->eigen) vec_muls(c->dif, clset->incnt, c->dif, b);      else mat_mulsx(c->inv, c->inv, sqrt(b), MAT_LOWER);    }                           /* rescale the eigenvalues or */  }                             /* the Cholesky decomposition */}  /* _regsize() *//*--------------------------------------------------------------------*/static void _regwgts (CLSET *clset){                               /* --- regularize cluster weights */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  min, max;             /* minimum and maximum weight */  double  a, t, w;              /* regularization parameter, buffers */  assert(clset                  /* check the function arguments */     && (clset->type & CLS_WEIGHT) && !(clset->type & CLS_JOINT));  a = clset->regps[4];          /* get the regularization parameter */  assert((a < -1) || (a > 0));  /* and check for the active range */  if (a < 0) {                  /* if alternative reg. version, */    min = DBL_MAX; max = 0;     /* initialize the weight range */    for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) {      w = mat_getwgt((--c)->cov);      if (w < min) min = w;     /* traverse the clusters and */      if (w > max) max = w;     /* determine the weight range */    }                           /* (minimum and maximum weight) */    t = max +a *min;            /* compute numerator of fraction, */    if (!(t > 0)) return;       /* check against the maximum ratio, */    a = t /(-1 -a);             /* and compute the necessary offset */  }  assert(a >= 0);               /* check the (computed) offset */  t = 1 /(1 +a *clset->clscnt); /* compute the normalization factor */  for (c = clset->cls +(i = clset->clscnt); --i >= 0; ) {    w = mat_getwgt((--c)->cov); /* traverse the clusters */    mat_setwgt(c->cov, t *(w + a));  }                             /* compute new cluster weights */}  /* _regwgts() *//*--------------------------------------------------------------------*/static void _regctrs (CLSET *clset){                               /* --- regularize the centers */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  assert(clset                  /* check the function argument */  &&    (clset->method & (CLS_ORIGIN|CLS_UNIT)));  c = clset->cls +(i = clset->clscnt);  if      (clset->method & CLS_ORIGIN) {    while (--i >= 0) { --c;     /* fix cluster center at origin */      vec_init(c->ctr, clset->incnt, -1); } }  else if (clset->method & CLS_UNIT) {    while (--i >= 0) { --c;     /* make center vectors of unit length */      vec_unitlen(c->ctr, c->ctr, clset->incnt); }  }                             /* (enforce center constraints) */}  /* _regctrs() *//*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/void cls_init (CLSET *clset, int mode, double range,               double randfn(void), const double *vec){                               /* --- initialize a set of clusters */  int     i, k, n;              /* loop variables */  CLUSTER *c;                   /* to traverse the clusters */  NSTATS  *nst;                 /* simple numerical statistics */  double  *z, *b;               /* cluster center, buffer */  double  x, d, m;              /* buffers for computations */  assert(clset && randfn);      /* check the function arguments */  clset->setup = 0;             /* cluster set needs setup */  /* --- initialize other parameters --- */  if (((mode & 0xf) != CLS_POINTS) /* if not called second time */  ||  (clset->init <= 0) || (clset->init >= clset->clscnt)) {    clset->type = CLS_CENTER;   /* only centers are initialized */    for (c = clset->cls +(k = clset->clscnt); --k >= 0; ) {      (--c)->size = 1;          /* set a uniform size of 1 */      mat_diasetx(c->cov, 1);   /* for each cluster and */    }                           /* a unit covariance matrix */  }                             /* (no shape and size parameters) */  /* --- special initialization for centers at origin --- */  if (mode & CLS_ORIGIN) {      /* if to set centers to origin, */    clset->type |= CLS_VARS;    /* random variances are needed */    clset->tnew |= CLS_VARS;    /* (otherwise there are no params.) */    for (c = clset->cls +(k = clset->clscnt); --k >= 0; ) {      (--c)->size = 1;          /* set a uniform size of 1 */      for (z = c->ctr +(i = clset->incnt); --i >= 0; ) {        *--z = 0;               /* set all coordinates to zero */        mat_set(c->cov, i, i, 0.5 +0.8*randfn());      }                         /* initialize the diagonal of */    }                           /* the covariance matrix randomly */  }                             /* (will be normalized in cls_setup) */  /* --- initialize cluster centers --- */  nst = clset->nst;             /* get the numerical statistics */  switch (mode & 0xf) {         /* evaluate the initialization mode */    case CLS_CENTER:            /* -- center of the data space */      z = clset->cls->ctr;      /* compute the center */      nst_center(nst, z);       /* of the data space */      for (c = clset->cls +(k = clset->clscnt); --k > 0; )        vec_copy((--c)->ctr, z, clset->incnt);      break;                    /* copy the center to all clusters */    case CLS_DIAG:              /* -- diagonal of the data space */    case CLS_LATIN:             /* -- latin hypercube sampling */      for (i = clset->incnt; --i >= 0; ) {        m = nst_max(nst, i);    /* compute value decrement */        d = (m -nst_min(nst, i)) / clset->clscnt;        x = m -0.5*d;           /* compute last value */        for (c = clset->cls +(k = clset->clscnt); --k >= 0; ) {          (--c)->ctr[i] = x; x -= d; }      }                         /* set equally spaced values */      if (mode == CLS_DIAG)     /* if only to set the diagonal, */        break;                  /* there is nothing else to be done */    /* case CLS_LATIN: */       /* -- latin hypercube sampling */      c = clset->cls;           /* shuffle elements of the centers */      for (n = clset->clscnt; --n > 0; ) {        for (i = clset->incnt; --i >= 0; ) {          k = (int)((n+1) *randfn());          if      (k > n) k = n;   /* compute a random index in */          else if (k < 0) k = 0;   /* the remaining set of centers */          x           = c[k].ctr[i];          c[k].ctr[i] = c[n].ctr[i];          c[n].ctr[i] = x;      /* exchange the i-th elements of the */	}                       /* k-th and the n-th cluster center */      } break;                  /* (shuffle dimensions independently) */    case CLS_POINTS:            /* -- given points in the data space */      if (vec) nst_norm(nst, vec, clset->vec);      vec = clset->vec;         /* scale given vector to the buffer */      if (clset->init >= clset->clscnt)        clset->init = 0;        /* if all clusters are init., restart */      c = clset->cls +clset->init++;      vec_copy(c->ctr, vec, clset->incnt);      break;                    /* copy the given vector */    case CLS_UNIFORM:           /* -- uniformly distributed */    default:                    /* (this is also the default) */      nst_spans(nst, b = clset->vec);  /* get the value spans */      for (c = clset->cls +(k = clset->clscnt); --k >= 0; )        for (z = (--c)->ctr, i = clset->incnt; --i >= 0; )          z[i] = nst_min(nst, i) +b[i] *randfn();      break;                    /* set cluster centers to random */  }                             /* points in the data space */  /* --- scale the coordinates --- */  if ((mode & 0xf) != CLS_POINTS)    for (c = clset->cls +(k = clset->clscnt); --k >= 0; ) {      --c; nst_norm(nst, c->ctr, c->ctr); }  if (((mode & 0xf) != CLS_POINTS)  ||  (clset->init >= clset->clscnt)) {    /* --- add a random offset --- */    if (range > 0) {            /* if a range for offsets is given */      for (c = clset->cls +(k = clset->clscnt); --k >= 0; )        for (z = (--c)->ctr +(i = clset->incnt); --i >= 0; )          *--z += (2 *randfn() -1) *range;    }                           /* add a random offset to all values */    /* --- normalize the centers --- */    if (mode & CLS_UNIT)        /* if centers on the unit sphere */      _regctrs(clset);          /* normalize the cluster centers */  }}  /* cls_init() *//*--------------------------------------------------------------------*/void cls_method (CLSET *clset, int method){                               /* --- set parameter update method */  int     i;                    /* loop variable */  CLUSTER *c;                   /* to traverse the clusters */  double  t;                    /* initialization value */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -