📄 ptree5.c
字号:
/*---------------------------------------------------------------------- File : ptree5.c Contents: probability/possibility tree management local structure learning functions Author : Christian Borgelt History : 22.10.1995 file created as ctree.c 12.01.2002 local structure learning moved to separate file 04.07.2002 function logGa made external 17.07.2002 function tables redesigned, _nsp moved to ptree3 18.07.2002 optimizations for independent gains added 17.01.2003 normalization of possibilistic measures added 18.01.2003 bug in computation of balanced spc. gain fixed----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "gamma.h"#include "ptree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define M_PI 3.14159265358979323846 /* \pi */#define M_LN2 0.69314718055994530942 /* ln(2) *//* --- flags for leaf merger evaluation --- */#define MRG_INDEP 0 /* gain is independent of mergers */#define MRG_LOCAL 1 /* local recomputation necessary */#define MRG_GLOBAL 2 /* global recomputation necessary *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2);typedef struct { /* --- function table element */ EVALFN *evalfn; /* leaf merger evaluation function */ int type; /* evaluation type, e.g. MRG_INDEP */} FTE; /* (function table element) */typedef struct { /* --- merge table --- */ EVALFN *evalfn; /* leaf merger evaluation function */ double minimp; /* minimal improvement */ int leafcnt; /* number of leaves in the tree */ int rowcnt; /* current number of table rows */ int *bids; /* indices of best mergers per row */ double **evals; /* evaluations of mergers */ PTLEAF *leaves[1]; /* list of leaf nodes */} MRGTAB; /* (merge table) *//*---------------------------------------------------------------------- Evaluation Functions----------------------------------------------------------------------*/static double _info (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- Shannon information change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2, c, n1, n2; /* to traverse the counters */ double s, t; /* sum of entropy terms */ assert(pt && leaf1 && leaf2 /* check the function arguments */ && (leaf1->total > 0) && (leaf2->total > 0)); pt->res1[0] = pt->res0[0]; /* simply copy N H_i (no change) */ n1 = leaf1->total; /* get the two leaf totals, sum them, */ n2 = leaf2->total; c = n1+n2; /* and update N H_j (marginal dist.) */ t = n1*log(n1) +n2*log(n2) -c*log(c); pt->res1[1] = pt->res0[1] +((t < 0) ? t : 0); lvl = pt->levels +pt->concnt; /* get the leaf level description */ c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the counters */ for (s = 0; --i >= 0; ) { /* of the two leaves and */ c = *--c1 + *--c2; /* compute the entropy term */ if (c > 0) s += c*log(c); /* of a merged leaf, then */ } /* update N H_ij (joint distribution) */ t = leaf1->eval +leaf2->eval -s; pt->res1[2] = pt->res0[2] +((t < 0) ? t : 0); pt->res1[3] = pt->res0[3]; /* note the normalization factors */ i = pt->leafcnt -1; /* (for one leaf less) */ pt->res1[4] = (i > 1) ? 1/(pt->total *log(i)) : 0; return s; /* return the leaf evaluation */} /* _info() *//*--------------------------------------------------------------------*/static double _quad (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- quadratic information change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2, c, n1, n2; /* to traverse the counters */ double s, t; /* sum of entropy terms */ assert(pt && leaf1 && leaf2); /* check the function arguments */ pt->res1[0] = pt->res0[0]; /* simply copy N^2/2 H^2_i */ n1 = leaf1->total; /* get the leaf totals, */ n2 = leaf2->total; c = n1+n2; /* sum them (total of merged leaf), */ t = n1*n1 +n2*n2 -c*c; /* and update N^2/2 H^2_j */ pt->res1[1] = pt->res0[1] +((t < 0) ? t : 0); lvl = pt->levels +pt->concnt; /* get the leaf level description */ c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the leaf counters */ for (s = 0; --i >= 0; ) { /* of the two leaves and */ c = *--c1 + *--c2; /* compute the entropy term */ s += c *c; /* of a merged leaf, */ } /* then update N^2/2 H^2_ij */ t = leaf1->eval +leaf2->eval -s; pt->res1[2] = pt->res0[2] +((t < 0) ? t : 0); pt->res1[3] = pt->res0[3]; /* note the normalization factors */ i = pt->leafcnt -1; /* (for one leaf less) */ pt->res0[4] = (i > 0) ? 1/(s *(1 -1/i)) : 0; return s; /* return the leaf evaluation */} /* _quad() *//*--------------------------------------------------------------------*/static double _spec (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- specificity change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *b, *c1, *c2; /* to traverse the buffers/counters */ assert(pt && (pt->total > 0));/* check the function argument */ pt->res1[0] = pt->res0[0]; /* simply copy S_i = nsp(child) */ lvl = pt->levels +pt->concnt; /* get the leaf level and */ b = pt->buf; /* process distribution on conditions */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) if ((leaf != leaf1) && (leaf != leaf2) && (leaf->total > 0)) *b++ = leaf->total; /* collect the leaf totals */ *b++ = leaf1->total +leaf2->total; pt->res1[1] = pt_nsp(pt->buf, (int)(b -pt->buf)); b = pt->buf; /* process the joint distribution */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if ((leaf == leaf1) || (leaf == leaf2) || (leaf->total <= 0)) continue; /* traverse nonempty leaves */ for (c1 = leaf->cnts +(i = lvl->cnt); --i >= 0; ) if (*--c1 > 0) *b++ = *c1;/* copy nonzero counters to buffer */ } /* and compute S_ij from them */ c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* add the counters for the merger */ while (--i >= 0) *b++ = *--c1 + *--c2; pt->res1[2] = pt_nsp(pt->buf, (int)(b -pt->buf)); pt->res1[3] = pt->res0[3]; /* note normalization factors */ i = pt->leafcnt -1; /* (for one leaf less) */ pt->res1[4] = (i > 1) ? M_LN2/log(i) : 1; return 0; /* return a dummy evaluation */} /* _spec() *//*--------------------------------------------------------------------*/static double _gini1 (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- Gini index 1 change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2, c; /* to traverse the counters */ float n1, n2, n; /* leaf counter totals */ double s, t; /* sum of squared frequencies */ assert(pt && leaf1 && leaf2 /* check the function arguments */ && (leaf1->total > 0) && (leaf2->total > 0)); pt->res1[0] = pt->res0[0]; /* simply copy sum_i N_i^2 */ n1 = leaf1->total; /* get the leaf totals, */ n2 = leaf2->total; n = n1+n2; /* sum them (total of merged leaf), */ t = n*n -n1*n1 -n2*n2; /* and update sum_j N_j^2 */ pt->res1[1] = pt->res0[1] +((t > 0) ? t : 0); lvl = pt->levels +pt->concnt; /* get the leaf level description */ c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the leaf counters */ for (s = 0; --i >= 0; ) { /* of the two leaves and */ c = *--c1 + *--c2; /* compute the squared frequencies */ s += c *c; /* of a merged leaf, */ } /* then update sum_ij N_ij^2 */ t = s -leaf1->eval -leaf2->eval; pt->res1[2] = pt->res0[2] +((t > 0) ? t : 0); pt->res1[3] = pt->res0[3] +(s/n -leaf1->eval/n1 -leaf2->eval/n2); return s; /* return the leaf evaluation */} /* _gini1() *//*--------------------------------------------------------------------*/static double _gini2 (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- Gini index 2 change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2, c, *b; /* to traverse the counters */ float n1, n2, n; /* leaf counter totals */ double s, t; /* sum of squared frequencies */ assert(pt && leaf1 && leaf2 /* check the function arguments */ && (leaf1->total > 0) && (leaf2->total > 0)); pt->res1[0] = pt->res0[0]; /* simply copy sum_i N_i^2 */ n1 = leaf1->total; /* get the leaf totals, */ n2 = leaf2->total; n = n1+n2; /* sum them (total of merged leaf), */ t = n*n -n1*n1 -n2*n2; /* and update sum_j N_j^2 */ pt->res1[1] = pt->res0[1] +((t > 0) ? t : 0); lvl = pt->levels +pt->concnt; /* get the leaf level description */ c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the leaf counters and */ b = lvl->buf +i +i; /* the second evaluation buffer */ for (s = 0; --i >= 0; ) { /* compute the squared frequencies */ c = *--c1 + *--c2; /* of a merged leaf and update */ s += c *= c; /* the buffered squared frequencies */ *--b += c -*c1**c1 -*c2**c2; } /* update the sums s_ij and w_ij */ t = s -leaf1->eval -leaf2->eval; pt->res1[2] = pt->res0[2] +((t > 0) ? t : 0); pt->res1[3] = pt->res0[3] +(s/n -leaf1->eval/n1 -leaf2->eval/n2); for (n = 0, c1 = b +(i = lvl->cnt); --i >= 0; ) n += *--c1 / *--b; /* completely recompute */ pt->res1[4] = n; /* w_ji = sum_i (1/N_i) sum_j N_ij^2 */ return s; /* return the leaf evaluation */} /* _gini2() *//*--------------------------------------------------------------------*/static double _bdm (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- Bayesian-Dirichlet change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2; /* to traverse the counters */ double s, t; /* sum of BD metric terms, buffer */ double p, a; /* prior and sensitivity */ assert(pt && leaf1 && leaf2 /* check the function arguments */ && (leaf1->total > 0) && (leaf2->total > 0)); pt->res1[0] = pt->res0[0]; /* copy the reference evaluation */ lvl = pt->levels +pt->concnt; /* get the leaf level description */ a = (pt->params[0] > -1) ? pt->params[0]+1 : 1; a *= a; /* get the sensitivity parameter */ p = (pt->params[1] != 0) ? pt->params[1] : 1; if (p < 0) /* get the prior/equiv. sample size */ p = (-p /lvl->cnt /pt->fullcnt) *(leaf1->pathcnt +leaf2->pathcnt); c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the leaf counters and */ for (s = 0; --i >= 0; ) /* sum the BD terms for the leaf */ s += logGa((*--c1 +*--c2) *a +p); t = lvl->cnt *p; /* compute the leaf term and update */ s += logGa(t) -lvl->cnt *logGa(p) /* the BD metric */ - logGa((leaf1->total +leaf2->total) *a +t); pt->res1[1] = pt->res0[1] +(s -leaf1->eval -leaf2->eval); return s; /* return the leaf evaluation */} /* _bdm() *//*--------------------------------------------------------------------*/static double _bdmod (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){ /* --- mod. Bayesian-Dirichlet change */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ float *c1, *c2, c, n; /* to traverse the counters */ double t, s; /* sum of BD metric terms, buffer */ double p, a; /* prior and sensitivity */ assert(pt && leaf1 && leaf2 /* check the function arguments */ && (leaf1->total > 0) && (leaf2->total > 0)); pt->res1[0] = pt->res0[0]; /* copy the reference evaluation */ lvl = pt->levels +pt->concnt; /* get the leaf level description */ a = (pt->params[0] > -1) ? pt->params[0]+1 : 1; if (a < 0) a = 0; /* get the sensitivity parameter */ p = (pt->params[1] != 0) ? pt->params[1] : 1; if (p < 0) /* get the prior/equiv. sample size */ p = (-p /lvl->cnt /pt->fullcnt) *(leaf1->pathcnt +leaf2->pathcnt); c1 = leaf1->cnts +(i = lvl->cnt); c2 = leaf2->cnts + i; /* traverse the leaf counters and */ for (s = 0; --i >= 0; ) { /* sum the BD terms for the counters */ c = *--c1 +*--c2; t = c *a +p; s += logGa(t +c) -logGa(t); } n = leaf1->total +leaf2->total; t = n *a +lvl->cnt *p; /* compute the leaf term and */ s += logGa(t) -logGa(t +n); /* update the modified BD metric */ pt->res1[1] = pt->res0[1] +(s -leaf1->eval -leaf2->eval);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -