📄 ptree4.c
字号:
{ double *r = pt->res1; /* --- symmetric Gini index */ double t = 2 *pt->total *pt->total -r[0] -r[1]; return (t > 0) ? ((r[3] +r[4]) *pt->total -r[0] -r[1]) /t : 0;} /* _ginisym() */ /* (N w_ji +N w_ij -s_i -s_j) */ /* / (2 N^2 -s_i -s_j) */static double _ginimod (PTREE *pt){ double *r = pt->res1; /* --- modified Gini index */ return (r[1] > 0) ? (r[2] /r[1] -r[0] /pt->total) : 0;} /* _ginimod() */ /* s_ij /s_j - s_i /N */static double _relief (PTREE *pt){ double *r = pt->res1; /* --- relief measure */ double t = pt->total *pt->total -r[0]; return ((r[0] > 0) && (t > 0)) ? (r[2] /r[0] -(r[1] -r[2]) /t) : 0;} /* _relief() */ /* s_ij /s_i -(s_j -s_ij) /(N^2 -s_i) *//*---------------------------------------------------------------------- Bayesian Measures----------------------------------------------------------------------*/static int _bdm (PTREE *pt){ /* --- Bayesian-Dirichlet metric */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *c; /* to traverse the buffer/counters */ double p, q, s, a; /* (sum of) prior(s), sensitivity */ double bdm, t; /* Bayesian-Dirichlet metric, buffer */ int x = 0; /* flag for multiple paths */ assert(pt && (pt->total > 0));/* check the function arguments */ lvl = pt->levels +pt->concnt; /* get the leaf level description */ a = (pt->params[0] > -1) ? pt->params[0]+1 : 1; a *= a; /* get the sensitivity parameter */ p = (pt->params[1] != 0) ? pt->params[1] : 1; if (p < 0) p /= -lvl->cnt; /* get the prior/equiv. sample size */ for (t = 0, c = lvl->buf +(i = lvl->cnt); --i >= 0; ) t += logGa(*--c *a +p); /* process the marginal distribution */ s = lvl->cnt *p; /* and compute the reference value */ t += -lvl->cnt *logGa(p) +(logGa(s) -logGa(pt->total *a +s)); pt->res0[0] = t; /* note the reference result */ if (pt->params[1] < 0) { /* adapt the prior for the */ p /= pt->fullcnt; x = 1; } /* likelihood equivalent version */ bdm = 0; /* process the joint distribution */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse the */ q = (x) ? p *leaf->pathcnt : p; /* nonempty leaves */ for (t = 0, c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) t += logGa(*--c *a +q); /* process the cond. distribution */ s = lvl->cnt *q; /* and compute a leaf term */ t += -lvl->cnt *logGa(q) +(logGa(s) -logGa(leaf->total *a +s)); bdm += leaf->eval = t; /* note the result in the leaf */ } /* and sum the leaf terms */ pt->res0[1] = bdm; /* note the Bayesian-Dirichlet metric */ return 0; /* return 'ok' */} /* _bdm() *//*--------------------------------------------------------------------*/static int _bdmod (PTREE *pt){ /* --- modified BD / K2 metric */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *c; /* to traverse the buffer/counters */ double p, q, a; /* prior and sensitivity */ double bdm, s, t; /* Bayesian-Dirichlet metric, buffers */ int x = 0; /* flag for multiple paths */ assert(pt && (pt->total > 0));/* check the function arguments */ lvl = pt->levels +pt->concnt; /* get the leaf level */ a = (pt->params[0] >= 0) ? pt->params[0] : 0; if (a < 0) a = 0; /* get the sensitivity parameter */ p = (pt->params[1] != 0) ? pt->params[1] : 1; if (p < 0) p /= -lvl->cnt; /* get the prior/equiv. sample size */ for (t = 0, c = lvl->buf +(i = lvl->cnt); --i >= 0; ) { s = *--c *a +p; t += logGa(s +*c) -logGa(s); } s = pt->total*a +lvl->cnt*p; /* process the marginal distribution */ t += logGa(s) -logGa(s +pt->total); pt->res0[0] = t; /* note the reference result */ if (pt->params[1] < 0) { /* adapt the prior for the */ p /= pt->fullcnt; x = 1; } /* likelihood equivalent version */ bdm = 0; /* process the joint distribution */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse the nonempty leaves */ q = (x) ? p *leaf->pathcnt : p; /* process the cond. distribs. */ for (t = 0, c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) { s = *--c *a +q; t += logGa(s +*c) -logGa(s); } s = leaf->total *a +lvl->cnt *q; t += logGa(s) -logGa(s +leaf->total); bdm += leaf->eval = t; /* note the result in the leaf */ } /* and sum the leaf terms */ pt->res0[1] = bdm; /* note the Bayesian-Dirichlet metric */ return 0; /* return 'ok' */} /* _bdmod() *//*--------------------------------------------------------------------*/static double _bfactor (PTREE *pt){ /* --- Bayes factor computation */ return (pt->res1[1] -pt->res1[0]) /(M_LN2 *pt->total);} /* _bfactor() */ /* compute log_2(bdm(G) /bdm(G_0)) *//*---------------------------------------------------------------------- Description Length Measures----------------------------------------------------------------------*/static int _dlrel (PTREE *pt){ /* --- desc. length (rel. freq.) */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *c; /* to traverse the buffer/counters */ double dat, mod, lGc, s; /* description lengths, buffers */ assert(pt && (pt->total > 0));/* check the function arguments */ lvl = pt->levels +pt->concnt; /* get the leaf level */ lGc = logGa(lvl->cnt); /* note ln(gamma(number of values)) */ s = 0; /* process the marginal distribution */ for (c = lvl->buf +(i = lvl->cnt); --i >= 0; ) if (*--c > 0) s += *c *log(*c); pt->res0[0] = pt->total *log(pt->total) -s; pt->res0[1] = logGa(pt->total +lvl->cnt) -logGa(pt->total +1) -lGc; dat = mod = 0; /* traverse the cond. distributions */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse nonempty leaves */ s = 0; /* process a conditional distribution */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) if (*--c > 0) s += *c *log(*c); dat += leaf->eval = leaf->total *log(leaf->total) -s; mod += logGa(leaf->total +lvl->cnt) -logGa(leaf->total +1) -lGc; } /* note and sum the results */ pt->res0[2] = dat; /* posterior data coding length */ pt->res0[3] = mod; /* posterior model coding length */ return 0; /* return 'ok' */} /* _dlrel() *//*--------------------------------------------------------------------*/static int _dlabs (PTREE *pt){ /* --- desc. length (abs. freq.) */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *b, *c; /* to traverse the buffer/counters */ double dat, mod, lGc, s, t; /* description lengths, buffers */ assert(pt && (pt->total > 0));/* check the function arguments */ lvl = pt->levels +pt->concnt; /* get the leaf level and */ lGc = logGa(lvl->cnt); /* compute ln(gamma(num. values)) */ s = 0; /* process the marginal distribution */ for (b = lvl->buf +(i = lvl->cnt); --i >= 0; ) s += logGa(*--b +1); /* compute bits for code book page */ pt->res0[0] = (t = logGa(pt->total +1)) -s; pt->res0[1] = logGa(pt->total +lvl->cnt) -t -lGc; dat = mod = 0; /* process the cond. distributions */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse the nonempty leaves */ s = 0; /* process a conditional distribution */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) s += logGa(*--c +1); /* compute bits for code book page */ dat += leaf->eval = (t = logGa(leaf->total +1)) -s; mod += logGa(leaf->total +lvl->cnt) -t -lGc; } /* note and sum the results */ pt->res0[2] = dat; /* data description length */ pt->res0[3] = mod; /* model description length */ return 0; /* return 'ok' */} /* _dlabs() *//*--------------------------------------------------------------------*/static double _rdlen (PTREE *pt){ double *r = pt->res1; /* --- reduction of desc. length */ double t = (pt->params[0]+1 > 0) ? pt->params[0]+1 : 1; return (r[0] -r[2] +(r[1] -r[3])/t) /(M_LN2 *pt->total);} /* _rdlen() */ /* (data +model/(a+1)) / (ln(2) *N) *//*--------------------------------------------------------------------*/static int _scinit (PTREE *pt){ /* --- stochastic complexity */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ double stc; /* temporary buffer */ assert(pt && (pt->total > 0));/* check the function arguments */ _info(pt); /* initialize information measures */ stc = log(0.5 *pt->total); /* compute log(N/2) */ lvl = pt->levels +pt->concnt; /* get the leaf level */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) if (leaf->total > 0) stc -= log(0.5 *leaf->total); pt->res0[3] = stc *0.5 *(lvl->cnt -1); pt->res0[4] = (1 -pt->leafcnt) *log(pow(M_PI, 0.5 *lvl->cnt)) - logGa(0.5 *lvl->cnt); return 0; /* compute the stochastic complexity */} /* _scinit() */ /* and return 'ok' *//*--------------------------------------------------------------------*/static double _stoco (PTREE *pt){ double *r = pt->res1; /* --- stochastic complexity */ return (r[0] +r[1] -r[2] +r[3] +r[4]) /(M_LN2 *pt->total);} /* _stoco() */ /* information gain +penalty term *//*---------------------------------------------------------------------- Probabilistic Leaf Functions----------------------------------------------------------------------*/static double _l_wdiff (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){ /* --- weighted differences */ int i; /* loop variable */ float *b, *c; /* to traverse the buffer/counters */ double sum = 0, t; /* sum of difference terms, buffer */ assert(pt && lvl && leaf); /* check the function arguments */ b = lvl->buf +(i = lvl->cnt); /* traverse marginal distribution */ c = leaf->cnts +i; /* and joint distribution */ if (pt->params[0] == 1) { while (--i >= 0) { /* if the exponent is 1 */ t = *--b *leaf->total - *--c *pt->total; sum += *c *fabs(t); /* sum the weighted absolute */ } } /* differences */ else if (pt->params[0] == 2) { while (--i >= 0) { /* if the exponent is 2 */ t = *--b *leaf->total - *--c *pt->total; sum += *c *t *t; /* sum the weighted squared */ } } /* differences */ else { /* if the exponent is anything */ while (--i >= 0) { /* other than 1 or 2 */ t = *--b *leaf->total - *--c *pt->total; sum += *c *pow(fabs(t), pt->params[0]); } /* sum the weighted differences */ } /* raised to the given power */ return sum; /* return the computed sum */} /* _l_wdiff() *//*--------------------------------------------------------------------*/static double _l_chi2 (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){ /* --- chi^2 measure */ int i; /* loop variable */ float *b, *c; /* to traverse the buffer/counters */ double sum = 0, t, p; /* sum of difference terms, buffer */ assert(pt && lvl && leaf); /* check the function arguments */ b = lvl->buf +lvl->cnt; /* traverse marginal and joint dist. */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) { --c; p = *--b *leaf->total; /* compute product of marginals */ if (p <= 0) continue; /* skip if product is zero */ t = p - *c *pt->total; /* compute difference to joint */ sum += t *t /p; /* sum the chi^2 terms */ } /* (modified squared differences) */ return sum; /* and return this sum */} /* _l_chi2() *//*--------------------------------------------------------------------*/static double _l_evid (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){ /* --- weight of evidence */ int i; /* loop variable */ float *b, *c; /* to traverse the buffer/counters */ double sum = 0, x, y, z; /* sum of terms, buffers */ assert(pt && lvl && leaf); /* check the function arguments */ b = lvl->buf +lvl->cnt; /* traverse marginal and joint dist. */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) { z = (double)*--c * *--b; /* compute common term only once */ x = (double)*c *pt->total -z; if (x == 0) continue; y = (double)*b *leaf->total -z; if (y == 0) continue; sum += *b *fabs(log(x/y)); /* sum the odds fractions weighted */ } /* with the marginal probability */ return sum *leaf->total; /* and return this sum */} /* _l_evid() *//*--------------------------------------------------------------------*/static double _l_relev (PTREE *pt, PTLVL *lvl, PTLEAF *leaf){ /* --- relevance */ int i; /* loop variable */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -