📄 ptree4.c
字号:
/*---------------------------------------------------------------------- File : ptree4.c Contents: probability/possibility tree management evaluation functions Author : Christian Borgelt History : 22.10.1995 file created as ctree.c 03.11.1995 first version of pt_eval programmed 11.12.1995 information gain (ratio) added 16.01.1996 ln(K2 metric) replaced by log_2(K2 metric)/N 05.02.1996 some evaluation measures added 07.02.1996 evaluation measure 'PT_GINI' added 23.02.1996 functions pt_mname and pt_minfo added 23.06.1996 comp. of specificity gain (ratio) redesigned 15.11.1996 sym. information/specificity gain ratio added 06.03.1997 some evaluation measures added 07.03.1997 functions _chi2 and _muts improved 10.03.1997 second symmetric info./spec. gain ratio added 15.02.1998 marginalization moved to function pt_eval 20.02.1998 Bayesian Dirichlet l.e. uniform metric added 24.02.1998 bug in function _woevid fixed 23.02.1999 changed to Bayes factor computation (K2,BDeu) 30.04.1999 several assertions added 13.11.1999 evaluation measure 'PT_WDIFF' added 26.05.2001 computation of ln(\Gamma(n)) improved 02.02.2002 major redesign completed 04.02.2002 quadratic information measures added 05.03.2002 bug in function _ginit1 fixed 01.07.2002 evaluation adapted to number of paths to a leaf 02.07.2002 functions _bdm and _bdmod redesigned 04.07.2002 function logGa made external 17.07.2002 function _spec redesigned, _nsp moved to ptree3.c 19.07.2002 function _sort made safe w.r.t. links 17.01.2003 normalization of possibilistic measures added 18.01.2003 bug in computation of balanced spc. gain fixed----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <math.h>#include <float.h>#include <assert.h>#include "gamma.h"#include "ptree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define M_PI 3.14159265358979323846 /* \pi */#define M_LN2 0.69314718055994530942 /* ln(2) *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef int INITFN (PTREE *pt);typedef double EVALFN (PTREE *pt);typedef double LEAFFN (PTREE *pt, PTLVL *lvl, PTLEAF *leaf);typedef struct { /* --- function table element --- */ INITFN *initfn; /* initialization function */ EVALFN *evalfn; /* evaluation function */ LEAFFN *leaffn; /* leaf evaluation function */} FTE; /* (function table element) *//*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static void _sort (PTLVL *lvl, PTNODE *node){ /* --- sort the leaves */ int i; /* loop variable */ PTNODE *child; /* to traverse the child nodes */ assert(lvl && node); /* check the function arguments */ if (lvl->index < 0) { /* if on the leaf level, */ node->succ = lvl->list; /* add the current leaf node */ lvl->list = node; } /* at the head of the level list */ else { /* if at an inner node */ for (i = (lvl++)->cnt; --i >= 0; ) { child = node->children[i];/* traverse the children */ if (child && (child->parent == node) && (child->index == i)) _sort(lvl, child); /* if the child node exists and */ } /* it is a genuine child (no link), */ } /* sort the nodes recursively */} /* _sort() *//*--------------------------------------------------------------------*/static void _sumsqr (PTREE *pt){ /* --- sum squared frequencies */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *c; /* to traverse the buffer/counters */ double s_i, s_j, s_ij, s; /* sums of squared frequencies */ assert(pt && (pt->total > 0));/* check the function arguments */ s_i = s_j = s_ij = 0; /* initialize the sums */ lvl = pt->levels +pt->concnt; /* get the leaf level */ for (c = lvl->buf +(i = lvl->cnt); --i >= 0; ) { --c; s_i += *c * *c; } /* sum squared marginals */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse nonempty leaves */ s_j += leaf->total *leaf->total; s = 0; /* traverse the counter vector */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) { --c; s += *c * *c; } /* sum squared joint distrib. terms */ s_ij += leaf->eval = s; /* note and sum the leaf terms */ } pt->res0[0] = s_i; /* s_i = sum_i N_i^2 */ pt->res0[1] = s_j; /* s_j = sum_j N_j^2 */ pt->res0[2] = s_ij; /* s_ij = sum_ij N_ij^2 */} /* _sumsqr() *//*---------------------------------------------------------------------- Information/Specificity Functions----------------------------------------------------------------------*/static int _info (PTREE *pt){ /* --- Shannon information init. */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *c; /* to traverse the buffers/counters */ double s_i, s_j, s_ij, n, t; /* sums of entropy terms, buffers */ assert(pt && (pt->total > 0));/* check the function arguments */ s_ij = s_i = s_j = 0; /* and initialize the sums */ lvl = pt->levels +pt->concnt; /* traverse the leaf level buffer */ for (c = lvl->buf +(i = lvl->cnt); --i >= 0; ) if (*--c > 0) s_i += *c *log(*c); for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse nonempty leaves */ s_j += leaf->total *log(leaf->total); t = 0; /* traverse the counter vector */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) if (*--c > 0) t += *c *log(*c); s_ij += leaf->eval = t; /* note and sum the leaf terms */ } n = pt->total; t = n *log(n); /* compute N *log(N) only once */ pt->res0[0] = t -s_i; /* N H_i = -N sum_i p_i *log(p_i) */ pt->res0[1] = t -s_j; /* N H_j = -N sum_j p_j *log(p_j) */ pt->res0[2] = t -s_ij; /* N H_ij = -N sum_ij p_ij *log(p_ij) */ pt->res0[3] = 1/(M_LN2 *n); /* note normalization factors */ pt->res0[4] = (pt->leafcnt > 1) ? 1/(n *log(pt->leafcnt)) : 0; return 0; /* return 'ok' */} /* _info() *//*--------------------------------------------------------------------*/static int _quad (PTREE *pt){ /* --- quadratic information init. */ double t; /* squared total frequency */ assert(pt && (pt->total > 0));/* check the function argument */ _sumsqr(pt); /* sum squares of frequencies */ t = pt->total; t *= t; /* compute N^2 only once */ pt->res0[0] = t -pt->res0[0]; /* N^2/2 H^2_i = N^2 -sum_i N_i^2 */ pt->res0[1] = t -pt->res0[1]; /* N^2/2 H^2_j = N^2 -sum_j N_j^2 */ pt->res0[2] = t -pt->res0[2]; /* N^2/2 H^2_ij = N^2 -sum_ij N_ij^2 */ pt->res0[3] = 2/t; /* note normalization factors */ pt->res0[4] = (pt->leafcnt > 0) ? 1/(t *(1 -1/pt->leafcnt)) : 0; return 0; /* return 'ok' */} /* _quad() *//*--------------------------------------------------------------------*/static int _spec (PTREE *pt){ /* --- specificity measures init. */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *b, *c; /* to traverse the buffers/counters */ assert(pt && (pt->total > 0));/* check the function argument */ lvl = pt->levels +pt->concnt; /* get the leaf level and compute */ pt->res0[0] = pt_nsp(lvl->buf, lvl->cnt); /* nsp(child) */ if (!pt->buf) { /* if there is no evaluation buffer */ pt->buf = (float*)malloc(lvl->cnt *pt->leafcnt *sizeof(float)); if (!pt->buf) return -1; /* allocate an evaluation buffer */ } /* for the nonspecificity comps. */ b = pt->buf; /* process distribution on conditions */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) if (leaf->total > 0) *b++ = leaf->total; pt->res0[1] = pt_nsp(pt->buf, (int)(b -pt->buf)); b = pt->buf; /* process the joint distribution */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse nonempty leaves */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) if (*--c > 0) *b++ = *c; /* copy nonzero counters to buffer */ } /* and compute S_ij from them */ pt->res0[2] = pt_nsp(pt->buf, (int)(b -pt->buf)); pt->res0[3] = 1/M_LN2/((pt->tplcnt > 0) ? pt->tplcnt : 1); pt->res0[4] = (pt->leafcnt > 1) ? M_LN2/log(pt->leafcnt) : 1; return 0; /* return 'ok' */} /* _spec() *//*--------------------------------------------------------------------*/static double _gain (PTREE *pt){ double *r = pt->res1; /* --- information/specificity gain */ return (r[0] +r[1] -r[2]) *r[3];} /* _gain() */ /* H_i +H_j -H_ij */static double _gbal (PTREE *pt){ double *r = pt->res1; /* --- balanced info./spec. gain */ return (r[0] +r[1] -r[2]) *r[3] *r[4];} /* _gbal() */ /* (H_i +H_j -H_ij) /r_j */static double _gratio (PTREE *pt){ double *r = pt->res1; /* --- info./spec. gain ratio */ return (r[1] > 0) ? (r[0] +r[1] -r[2]) /r[1] : 0;} /* _grat() */ /* (H_i +H_j -H_ij) /H_j */static double _sgr1 (PTREE *pt){ double *r = pt->res1; /* --- sym. info./spec. gain ratio 1 */ return (r[2] > 0) ? (r[0] +r[1] -r[2]) /r[2] : 0;} /* _sgr1() */ /* (H_i +H_j -H_ij) /H_ij */static double _sgr2 (PTREE *pt){ double *r = pt->res1; /* --- sym. info./spec. gain ratio 2 */ return (r[0] +r[1] > 0) ? (r[0] +r[1] -r[2]) /(r[0] +r[1]) : 0;} /* _sgr2() */ /* (H_i +H_j -H_ij) /(H_i +H_j) *//*---------------------------------------------------------------------- Gini Index and Related Measures----------------------------------------------------------------------*/static int _ginit1 (PTREE *pt){ /* --- Gini index initialization */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ double w_ij; /* weighted sum of squared freqs. */ assert(pt && (pt->total > 0));/* check the function arguments */ _sumsqr(pt); w_ij = 0; /* compute sums of squared freqs. */ lvl = pt->levels +pt->concnt; /* get the leaf level */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) if (leaf->total > 0) w_ij += leaf->eval /leaf->total; pt->res0[3] = w_ij; /* w_ij = sum_j (1/N_j) sum_i N_ij^2 */ return 0; /* return 'ok' */} /* _ginit1() */ /*--------------------------------------------------------------------*/static int _ginit2 (PTREE *pt){ /* --- symmetric Gini index init. */ int i; /* loop variable */ PTLVL *lvl; /* leaf level description */ PTLEAF *leaf; /* to traverse the leaves */ float *b, *c; /* to traverse the buffer/counters */ double w_ij, w_ji; /* weighted sums of squared freqs. */ assert(pt && (pt->total > 0));/* check the function arguments */ _sumsqr(pt); w_ij = w_ji = 0; /* compute sums of squared freqs. */ lvl = pt->levels +pt->concnt; /* get the leaf level and */ b = lvl->buf +lvl->cnt; /* the second evaluation buffer */ for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) { if (leaf->total <= 0) continue; /* traverse nonempty leaves */ w_ij += leaf->eval / leaf->total; b += lvl->cnt; /* traverse the counter vector */ for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) { --c; *--b += *c * *c; } /* sum the squared counters */ } /* in the evaluation buffer */ for (c = b +(i = lvl->cnt); --i >= 0; ) w_ji += *--c / *--b; /* weight and sum the terms */ pt->res0[3] = w_ij; /* w_ij = sum_j (1/N_j) sum_i N_ij^2 */ pt->res0[4] = w_ji; /* w_ji = sum_i (1/N_i) sum_j N_ij^2 */ return 0; /* return 'ok' */} /* _ginit2() */ /*--------------------------------------------------------------------*/static double _gini (PTREE *pt){ double *r = pt->res1; /* --- normal Gini index */ return (r[3] -r[0] /pt->total) /pt->total;} /* _gini() */ /* w_ij /N - s_i /N^2 */static double _ginisym (PTREE *pt)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -