⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ptree3.c

📁 数据挖掘中的一算法 ines算法 c下实现的。适合初学习数据挖掘者借鉴
💻 C
📖 第 1 页 / 共 3 页
字号:
/*--------------------------------------------------------------------*/static double _quad (PTNODE *node, PTLVL *lvl,                     int measure, double *params){                               /* --- quadratic information measures */  int    i, j;                  /* loop variables */  PTNODE **p;                   /* to traverse the child vector */  float  *c;                    /* to traverse the buffers/counters */  double s_i, s_j, s_ij, s;     /* sums of squared frequencies */  double quad, t;               /* quadratic information gain, buffer */  assert(node && lvl);          /* check the function arguments */  s_i = s_j = s_ij = 0;         /* initialize sums */  for (c = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) {    t = *--c; s_i += t*t; }     /* compute sum_i N_i^2 (child dist.) */  for (c = lvl[0].buf +(j = lvl[0].cnt); --j >= 0; ) {    t = *--c; s_j += t*t; }     /* compute sum_j N_j^2 (parent dist.) */  for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing children */    s = 0;                      /* and the counters of each child */    for (c = ((PTLEAF*)*p)->cnts +(i = lvl[1].cnt); --i >= 0; ) {      t = *--c; s += t*t; }     /* compute sum_ij N_ij^2 */    s_ij += s;                  /* (joint distribution) */  }  t = lvl[0].total; t *= t;     /* compute N^2 only once */  s_i  = t -s_i;                /* N^2/2 H^2_i  = N^2 -sum_i  N_i^2  */  s_j  = t -s_j;                /* N^2/2 H^2_j  = N^2 -sum_j  N_j^2  */  s_ij = t -s_ij;               /* N^2/2 H^2_ij = N^2 -sum_ij N_ij^2 */  quad = s_i +s_j -s_ij;        /* compute quad. info. gain *N^2/2 */  switch (measure) {            /* evaluate measure code */    case PT_QISGR1: if (s_ij     <= 0) return 0;                    quad /= s_ij;      break;    case PT_QISGR2: if (s_i +s_j <= 0) return 0;                    quad /= s_i +s_j;  break;    default       : quad /= 0.5 *t;    break;  }                             /* form requested entropy ratio */  return (quad < 0) ? 0 : quad; /* return the information measure */}  /* _quad() *//*--------------------------------------------------------------------*/static double _gini (PTNODE *node, PTLVL *lvl,                     int measure, double *params){                               /* --- symmetric Gini index */  int    i, j;                  /* loop variables */  PTNODE **p;                   /* to traverse the child vector */  float  *bp, *bc, *bx, *c;     /* to traverse the buffers/counters */  double s_i, s_j, s;           /* sums of squared frequencies */  double w_ij, w_ji;            /* weighted sums of squared freqs. */  double gini, n, t;            /* symmetric Gini index, buffer */  assert(node && lvl);          /* check the function arguments */  s_i = s_j = w_ij = 0;         /* initialize sums */  for (bc = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) {    t = *--bc; s_i += t*t; }    /* compute sum_i N_i^2 (child dist.) */  for (bp = lvl[0].buf +(j = lvl[0].cnt); --j >= 0; ) {    t = *--bp; s_j += t*t; }    /* compute sum_j N_j^2 (parent dist.) */  for (bx = bc +lvl[1].cnt +(i = lvl[1].cnt); --i >= 0; )    *--bx = 0;                  /* clear the extended buffer */  for (p = node->children  +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing children */    s = 0;                      /* and the counters of each child */    for (c = ((PTLEAF*)*p)->cnts +(i = lvl[1].cnt); --i >= 0; ) {      t = *--c; s += t *= t;    /* compute N_ij^2 and */      bx[i] += (float)t;        /* sum N_ij^2 over i and */    }                           /* (in buffer bx) over j */    if (bp[j] > 0) w_ij += s /bp[j];  }                             /* compute weighted sum over j */  w_ji = 0;                     /* and afterwards also over i */  for (i = lvl[1].cnt; --i >= 0; )    if (bc[i] > 0) w_ji += bx[i] /bc[i];  n    = lvl[0].total;          /* get the total number of cases */  gini = n *(w_ij +w_ji) -(t = s_i -s_j);  t    = 2 *n *n -t;            /* compute numerator and denominator */  return ((gini <= 0) || (t <= 0)) ? 0 : gini/t;}  /* _gini() */                /* return the symmetric Gini index *//*--------------------------------------------------------------------*/static double _wdiff (PTNODE *node, PTLVL *lvl,                      int measure, double *params){                               /* --- weighted absolute difference */  int    i, j, e;               /* loop variables, exponent */  PTNODE **p;                   /* to traverse the child vector */  float  bp, *bc, *c;           /* to traverse the buffers/counters */  double wdiff = 0, n, t;       /* weighted abs. diff., buffers */  assert(node && lvl && params);/* check the function arguments */  if      (*params == 1) e = 1; /* note simple exponents */  else if (*params == 2) e = 2; /* as an integer value */  else                   e = 0; /* for faster checks */  n = lvl[0].total;             /* get the total frequency */  for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing child nodes */    bp = lvl[0].buf[j];         /* note the parent frequency */    bc = lvl[1].buf +(i = lvl[1].cnt);    c  = ((PTLEAF*)*p)->cnts+i; /* traverse the counters */    if      (e == 1)            /* if exponent for difference is 1, */      while (--i >= 0) {        /* sum weighted absolute differences */        t = *--bc *bp -*--c *n; wdiff += *c *fabs(t); }    else if (e == 2)            /* if exponent for difference is 2, */      while (--i >= 0) {        /* sum weighted squared differences */        t = *--bc *bp -*--c *n; wdiff += *c *t *t; }    else                        /* if any other exponent, */      while (--i >= 0) {        /* raise differences to given power */        t = *--bc *bp -*--c *n; wdiff += *c *pow(fabs(t), *params); }  }  t = n*n;                      /* compute the denominator */  if      (e == 2) t = t*t;     /* of the normalization factor */  else if (e != 1) t = pow(t, *params);  return wdiff / (n*t);         /* return the norm. weighted diff. */}  /* _wdiff() *//*--------------------------------------------------------------------*/static double _chi2 (PTNODE *node, PTLVL *lvl,                     int measure, double *params){                               /* --- chi^2 measure */  int    i, j;                  /* loop variables */  PTNODE **p;                   /* to traverse the child vector */  float  bp, *bc, *c;           /* to traverse the buffers/counters */  double chi2 = 0, n, x, y;     /* chi^2 measure, buffers */  assert(node && lvl);          /* check the function arguments */  n = lvl[0].total;             /* get the total frequency */  for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing children */    bp = lvl[0].buf[j];         /* get the parent frequency */    bc = lvl[1].buf +lvl[1].cnt;    for (c = ((PTLEAF*)*p)->cnts +(i = lvl[1].cnt); --i >= 0; ) {      --c; y = *--bc *bp;       /* traverse the child frequencies */      if (y <= 0) continue;     /* and check the denominator */      x = y - *c *n;            /* compute the numerator and */      chi2 += x*x /y;           /* from it the next term */    }                           /* of the chi^2 measure */  }  if (measure == PT_CHI2NRM) {  /* if normalized version requested */    x = (lvl[0].cnt -1) *(lvl[1].cnt -1);    if (x > 0) chi2 /= x;       /* normalize the chi12 measure by */  }                             /* dividing by the degrees of freedom */  return chi2 / (n*n);          /* return the norm. chi^2 measure */}  /* _chi2() *//*----------------------------------------------------------------------  Possibilistic Conditional Dependence Measures----------------------------------------------------------------------*/static double _spec (PTNODE *node, PTLVL *lvl,                     int measure, double *params){                               /* --- specificity measures */  int    i, j;                  /* loop variables */  PTNODE **p;                   /* to traverse the child vector */  float  *buf, *b, *c;          /* to traverse the buffers/counters */  double s_i, s_j, s_ij;        /* nonspecificity of distributions */  double spec;                  /* specificity gain (ratio) */  assert(node && lvl);          /* check the function arguments */  buf = b = (float*)malloc(lvl[0].cnt *lvl[1].cnt *sizeof(float));  if (!buf) return PT_ERROR;    /* allocate workspace */  for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing children */    for (c = ((PTLEAF*)*p)->cnts +(i = lvl[1].cnt); --i >= 0; )      if (*--c > 0) *b++ = *c;  /* copy the non-zero counters */  }                             /* of the joint distribution */  s_j  = pt_nsp(lvl[0].buf, lvl[0].cnt);  s_i  = pt_nsp(lvl[1].buf, lvl[1].cnt);  s_ij = pt_nsp(buf, (int)(b -buf));  free(buf);                    /* compute the different specs. and */  spec = s_i +s_j -s_ij;        /* from them the specificity gain */  switch (measure) {            /* evaluate the measure code */    case PT_SPCSGR1: if (s_ij     <= 0) return 0;                     spec /= s_ij;               break;    case PT_SPCSGR2: if (s_i +s_j <= 0) return 0;                     spec /= s_i +s_j;           break;    default        : spec /= LN_2 *lvl[1].total; break;  }                             /* form requested specificity ratio */  return spec;                  /* return specificity measure */}  /* _spec() *//*--------------------------------------------------------------------*/static double _pdiff (PTNODE *node, PTLVL *lvl,                      int measure, double *params){                               /* --- poss. weighted abs. difference */  int    i, j, e;               /* loop variables, exponent */  PTNODE **p;                   /* to traverse the child vector */  float  bp, *bc, *c;           /* to traverse the buffers/counters */  double pdiff = 0, t;          /* weighted abs. diff., buffers */  assert(node && lvl);          /* check the function arguments */  if      (*params == 1) e = 1; /* note simple exponents */  else if (*params == 2) e = 2; /* as an integer value */  else                   e = 0; /* for faster checks */  for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) {    if (!*--p) continue;        /* traverse the existing children */    bp = lvl[0].buf[j];         /* get the parent frequency */    bc = lvl[1].buf +(i = lvl[1].cnt);    c  = ((PTLEAF*)*p)->cnts+i; /* traverse the counters */    if      (e == 1)            /* if exponent for difference is 1, */      while (--i >= 0) {        /* sum weighted absolute differences */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -