⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ptree4.c

📁 数据挖掘中的一算法 ines算法 c下实现的。适合初学习数据挖掘者借鉴
💻 C
📖 第 1 页 / 共 3 页
字号:
{ double *r = pt->res1;         /* --- symmetric Gini index */  double t  = 2 *pt->total *pt->total -r[0] -r[1];  return (t > 0) ? ((r[3] +r[4]) *pt->total -r[0] -r[1]) /t : 0;}  /* _ginisym() */             /* (N w_ji +N w_ij -s_i -s_j) */                                /* / (2 N^2 -s_i -s_j) */static double _ginimod (PTREE *pt){ double *r = pt->res1;         /* --- modified Gini index */  return (r[1] > 0) ? (r[2] /r[1] -r[0] /pt->total) : 0;}  /* _ginimod() */             /* s_ij /s_j - s_i /N */static double _relief (PTREE *pt){ double *r = pt->res1;         /* --- relief measure */  double t  = pt->total *pt->total -r[0];  return ((r[0] > 0) && (t > 0)) ? (r[2] /r[0] -(r[1] -r[2]) /t) : 0;}  /* _relief() */              /* s_ij /s_i -(s_j -s_ij) /(N^2 -s_i) *//*----------------------------------------------------------------------  Bayesian Measures----------------------------------------------------------------------*/static int _bdm (PTREE *pt){                               /* --- Bayesian-Dirichlet metric */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *c;                    /* to traverse the buffer/counters */  double p, q, s, a;            /* (sum of) prior(s), sensitivity */  double bdm, t;                /* Bayesian-Dirichlet metric, buffer */  int    x = 0;                 /* flag for multiple paths */  assert(pt && (pt->total > 0));/* check the function arguments */  lvl = pt->levels +pt->concnt; /* get the leaf level description */  a   = (pt->params[0] > -1) ? pt->params[0]+1 : 1;  a  *= a;                      /* get the sensitivity parameter */  p   = (pt->params[1] != 0) ? pt->params[1]   : 1;  if (p < 0) p /= -lvl->cnt;    /* get the prior/equiv. sample size */  for (t = 0, c = lvl->buf +(i = lvl->cnt); --i >= 0; )    t += logGa(*--c *a +p);     /* process the marginal distribution */  s  =  lvl->cnt *p;            /* and compute the reference value */  t += -lvl->cnt *logGa(p) +(logGa(s) -logGa(pt->total *a +s));  pt->res0[0] = t;              /* note the reference result */  if (pt->params[1] < 0) {      /* adapt the prior for the */    p /= pt->fullcnt; x = 1; }  /* likelihood equivalent version */  bdm = 0;                      /* process the joint distribution */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue;   /* traverse the */    q = (x) ? p *leaf->pathcnt : p;   /* nonempty leaves */    for (t = 0, c = leaf->cnts +(i = lvl->cnt); --i >= 0; )      t += logGa(*--c *a +q);   /* process the cond. distribution */    s  =  lvl->cnt *q;          /* and compute a leaf term */    t += -lvl->cnt *logGa(q) +(logGa(s) -logGa(leaf->total *a +s));    bdm += leaf->eval = t;      /* note the result in the leaf */  }                             /* and sum the leaf terms */  pt->res0[1] = bdm;            /* note the Bayesian-Dirichlet metric */  return 0;                     /* return 'ok' */}  /* _bdm() *//*--------------------------------------------------------------------*/static int _bdmod (PTREE *pt){                               /* --- modified BD / K2 metric */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *c;                    /* to traverse the buffer/counters */  double p, q, a;               /* prior and sensitivity */  double bdm, s, t;             /* Bayesian-Dirichlet metric, buffers */  int    x = 0;                 /* flag for multiple paths */  assert(pt && (pt->total > 0));/* check the function arguments */  lvl = pt->levels +pt->concnt; /* get the leaf level */  a   = (pt->params[0] >= 0) ? pt->params[0] : 0;  if (a < 0) a = 0;             /* get the sensitivity parameter */  p   = (pt->params[1] != 0) ? pt->params[1] : 1;  if (p < 0) p /= -lvl->cnt;    /* get the prior/equiv. sample size */  for (t = 0, c = lvl->buf +(i = lvl->cnt); --i >= 0; ) {    s = *--c *a +p; t += logGa(s +*c) -logGa(s); }  s  = pt->total*a +lvl->cnt*p; /* process the marginal distribution */  t += logGa(s) -logGa(s +pt->total);  pt->res0[0] = t;              /* note the reference result */  if (pt->params[1] < 0) {      /* adapt the prior for the */    p /= pt->fullcnt; x = 1; }  /* likelihood equivalent version */  bdm = 0;                      /* process the joint distribution */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse the nonempty leaves */    q = (x) ? p *leaf->pathcnt : p; /* process the cond. distribs. */    for (t = 0, c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) {      s = *--c *a +q; t += logGa(s +*c) -logGa(s); }    s  = leaf->total *a +lvl->cnt *q;    t += logGa(s) -logGa(s +leaf->total);    bdm += leaf->eval = t;      /* note the result in the leaf */  }                             /* and sum the leaf terms */  pt->res0[1] = bdm;            /* note the Bayesian-Dirichlet metric */  return 0;                     /* return 'ok' */}  /* _bdmod() *//*--------------------------------------------------------------------*/static double _bfactor (PTREE *pt){                               /* --- Bayes factor computation */  return (pt->res1[1] -pt->res1[0]) /(M_LN2 *pt->total);}  /* _bfactor() */             /* compute log_2(bdm(G) /bdm(G_0)) *//*----------------------------------------------------------------------  Description Length Measures----------------------------------------------------------------------*/static int _dlrel (PTREE *pt){                               /* --- desc. length (rel. freq.) */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *c;                    /* to traverse the buffer/counters */  double dat, mod, lGc, s;      /* description lengths, buffers */  assert(pt && (pt->total > 0));/* check the function arguments */  lvl = pt->levels +pt->concnt; /* get the leaf level */  lGc = logGa(lvl->cnt);        /* note ln(gamma(number of values)) */  s   = 0;                      /* process the marginal distribution */  for (c = lvl->buf +(i = lvl->cnt); --i >= 0; )    if (*--c > 0) s += *c *log(*c);  pt->res0[0] = pt->total *log(pt->total) -s;  pt->res0[1] = logGa(pt->total +lvl->cnt) -logGa(pt->total +1) -lGc;  dat = mod = 0;                /* traverse the cond. distributions */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse nonempty leaves */    s = 0;                      /* process a conditional distribution */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; )      if (*--c > 0) s += *c *log(*c);    dat += leaf->eval = leaf->total *log(leaf->total) -s;    mod += logGa(leaf->total +lvl->cnt) -logGa(leaf->total +1) -lGc;  }                             /* note and sum the results */  pt->res0[2] = dat;            /* posterior data  coding length */  pt->res0[3] = mod;            /* posterior model coding length */  return 0;                     /* return 'ok' */}  /* _dlrel() *//*--------------------------------------------------------------------*/static int _dlabs (PTREE *pt){                               /* --- desc. length (abs. freq.) */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *b, *c;                /* to traverse the buffer/counters */  double dat, mod, lGc, s, t;   /* description lengths, buffers */  assert(pt && (pt->total > 0));/* check the function arguments */  lvl = pt->levels +pt->concnt; /* get the leaf level and */  lGc = logGa(lvl->cnt);        /* compute ln(gamma(num. values)) */  s   = 0;                      /* process the marginal distribution */  for (b = lvl->buf +(i = lvl->cnt); --i >= 0; )    s += logGa(*--b +1);        /* compute bits for code book page */  pt->res0[0] = (t = logGa(pt->total +1)) -s;  pt->res0[1] = logGa(pt->total +lvl->cnt) -t -lGc;  dat = mod = 0;                /* process the cond. distributions */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse the nonempty leaves */    s = 0;                      /* process a conditional distribution */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; )      s += logGa(*--c +1);      /* compute bits for code book page */    dat += leaf->eval = (t = logGa(leaf->total +1)) -s;    mod += logGa(leaf->total +lvl->cnt) -t -lGc;  }                             /* note and sum the results */  pt->res0[2] = dat;            /* data  description length */  pt->res0[3] = mod;            /* model description length */  return 0;                     /* return 'ok' */}  /* _dlabs() *//*--------------------------------------------------------------------*/static double _rdlen (PTREE *pt){ double *r = pt->res1;         /* --- reduction of desc. length */  double t = (pt->params[0]+1 > 0) ? pt->params[0]+1 : 1;  return (r[0] -r[2] +(r[1] -r[3])/t) /(M_LN2 *pt->total);}  /* _rdlen() */               /* (data +model/(a+1)) / (ln(2) *N) *//*--------------------------------------------------------------------*/static int _scinit (PTREE *pt){                               /* --- stochastic complexity */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  double stc;                   /* temporary buffer */  assert(pt && (pt->total > 0));/* check the function arguments */  _info(pt);                    /* initialize information measures */  stc = log(0.5 *pt->total);    /* compute log(N/2) */  lvl = pt->levels +pt->concnt; /* get the leaf level */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ)    if (leaf->total > 0) stc -= log(0.5 *leaf->total);  pt->res0[3] = stc *0.5 *(lvl->cnt -1);  pt->res0[4] = (1 -pt->leafcnt) *log(pow(M_PI, 0.5 *lvl->cnt))              - logGa(0.5 *lvl->cnt);  return 0;                     /* compute the stochastic complexity */}  /* _scinit() */              /* and return 'ok' *//*--------------------------------------------------------------------*/static double _stoco (PTREE *pt){ double *r = pt->res1;         /* --- stochastic complexity */  return (r[0] +r[1] -r[2] +r[3] +r[4]) /(M_LN2 *pt->total);}  /* _stoco() */               /* information gain +penalty term *//*----------------------------------------------------------------------  Probabilistic Leaf Functions----------------------------------------------------------------------*/static double _l_wdiff (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){                               /* --- weighted differences */  int    i;                     /* loop variable */  float  *b, *c;                /* to traverse the buffer/counters */  double sum = 0, t;            /* sum of difference terms, buffer */  assert(pt && lvl && leaf);    /* check the function arguments */  b = lvl->buf +(i = lvl->cnt); /* traverse marginal distribution */  c = leaf->cnts +i;            /* and joint distribution */  if      (pt->params[0] == 1) {    while (--i >= 0) {          /* if the exponent is 1 */      t = *--b *leaf->total - *--c *pt->total;      sum += *c *fabs(t);       /* sum the weighted absolute */    } }                         /* differences */  else if (pt->params[0] == 2) {    while (--i >= 0) {          /* if the exponent is 2 */      t = *--b *leaf->total - *--c *pt->total;      sum += *c *t *t;          /* sum the weighted squared */    } }                         /* differences */  else {                        /* if the exponent is anything */    while (--i >= 0) {          /* other than 1 or 2 */      t = *--b *leaf->total - *--c *pt->total;      sum += *c *pow(fabs(t), pt->params[0]);    }                           /* sum the weighted differences */  }                             /* raised to the given power */  return sum;                   /* return the computed sum */}  /* _l_wdiff() *//*--------------------------------------------------------------------*/static double _l_chi2 (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){                               /* --- chi^2 measure */  int    i;                     /* loop variable */  float  *b, *c;                /* to traverse the buffer/counters */  double sum = 0, t, p;         /* sum of difference terms, buffer */  assert(pt && lvl && leaf);    /* check the function arguments */  b = lvl->buf +lvl->cnt;       /* traverse marginal and joint dist. */  for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) {    --c; p = *--b *leaf->total; /* compute product of marginals */    if (p <= 0) continue;       /* skip if product is zero */    t = p - *c *pt->total;      /* compute difference to joint */    sum += t *t /p;             /* sum the chi^2 terms */  }                             /* (modified squared differences) */  return sum;                   /* and return this sum */}  /* _l_chi2() *//*--------------------------------------------------------------------*/static double _l_evid (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){                               /* --- weight of evidence */  int    i;                     /* loop variable */  float  *b, *c;                /* to traverse the buffer/counters */  double sum = 0, x, y, z;      /* sum of terms, buffers */  assert(pt && lvl && leaf);    /* check the function arguments */  b = lvl->buf +lvl->cnt;       /* traverse marginal and joint dist. */  for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) {    z = (double)*--c * *--b;    /* compute common term only once */    x = (double)*c *pt->total   -z; if (x == 0) continue;    y = (double)*b *leaf->total -z; if (y == 0) continue;    sum += *b *fabs(log(x/y));  /* sum the odds fractions weighted */  }                             /* with the marginal probability */  return sum *leaf->total;      /* and return this sum */}  /* _l_evid() *//*--------------------------------------------------------------------*/static double _l_relev (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){                               /* --- relevance */  int    i;                     /* loop variable */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -