📄 ptree3.c
字号:
/*---------------------------------------------------------------------- File : ptree3.c Contents: probability/possibility tree management (conditional) dependence measurement functions Author : Christian Borgelt History : 22.10.1995 file created as ctree.c 25.03.1997 function pt_depend added 02.01.2002 switched to sorting function from vecops 08.01.2002 symmetric Gini index added 12.01.2002 normalized chi^2 measure added 22.01.2002 adapted to redesigned node structure 04.02.2002 quadratic information measures added 17.01.2003 normalization of possibilistic measures added----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "vecops.h"#include "ptree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2 0.693147180559945 /* ln(2) *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef struct { /* --- measure information --- */ char *name; /* name of the measure */ int flags; /* flags, e.g. PT_SYM */} MINFO; /* (measure information) */typedef float MARGFN (PTNODE *node, PTLVL *lvl);typedef double DEPFN (PTNODE *node, PTLVL *lvl, int measure, double *params);/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/static const MINFO mi_prob[PT_UNKNOWN+1] = { /* PT_NONE 0 */ { "no measure", 1 }, /* PT_INFGAIN 1 */ { "information gain", PT_SYM }, /* PT_INFGBAL 2 */ { "balanced information gain", 1 }, /* PT_INFGR 3 */ { "information gain ratio", 1 }, /* PT_INFSGR1 4 */ { "symmetric information gain ratio 1", PT_SYM }, /* PT_INFSGR2 5 */ { "symmetric information gain ratio 2", PT_SYM }, /* PT_QIGAIN 6 */ { "quadratic information gain", PT_SYM }, /* PT_QIGBAL 7 */ { "balanced quadratic information gain", 1 }, /* PT_QIGR 8 */ { "quadratic information gain ratio", 1 }, /* PT_QISGR1 9 */ { "symmetric quad. info. gain ratio 1", PT_SYM }, /* PT_QISGR2 10 */ { "symmetric quad. info. gain ratio 2", PT_SYM }, /* PT_GINI 11 */ { "Gini index", 1 }, /* PT_GINISYM 12 */ { "symmetric Gini index", PT_SYM }, /* PT_GINIMOD 13 */ { "modified Gini index", 1 }, /* PT_RELIEF 14 */ { "relief measure", 1 }, /* PT_WDIFF 15 */ { "sum of weighted differences", PT_SYM }, /* PT_CHI2 16 */ { "chi^2 measure", PT_SYM }, /* PT_CHI2NRM 17 */ { "normalized chi^2 measure", PT_SYM }, /* PT_WEVID 18 */ { "weight of evidence", 1 }, /* PT_RELEV 19 */ { "relevance", 1 }, /* PT_BDM 20 */ { "Bayesian-Dirichlet / K2 metric", 1 }, /* PT_BDMOD 21 */ { "modified Bayesian-Dirichlet / K2 metric", 1 }, /* PT_RDLREL 22 */ { "reduction of description length " "(rel. freq.)", 1 }, /* PT_RDLABS 23 */ { "reduction of description length " "(abs. freq.)", 1 }, /* PT_STOCO 24 */ { "stochastic complexity", 1 }, /* PT_UNKNOWN 25 */ { "<unknown>", 0 }}; /* measures for possibility trees */static const MINFO mi_poss[PT_UNKNOWN+1] = { /* PT_NONE 0 */ { "no measure", 1 }, /* PT_SPCGAIN 1 */ { "specificity gain", PT_SYM }, /* PT_SPCGBAL 2 */ { "balanced specificity gain", 1 }, /* PT_SPCGR 3 */ { "specificity gain ratio", 1 }, /* PT_SPCSGR1 4 */ { "symmetric specificity gain ratio 1", PT_SYM }, /* PT_SPCSGR2 5 */ { "symmetric specificity gain ratio 2", PT_SYM }, /* 6 */ { "<unknown>", 0 }, /* 7 */ { "<unknown>", 0 }, /* 8 */ { "<unknown>", 0 }, /* 9 */ { "<unknown>", 0 }, /* 10 */ { "<unknown>", 0 }, /* 11 */ { "<unknown>", 0 }, /* 12 */ { "<unknown>", 0 }, /* 13 */ { "<unknown>", 0 }, /* 14 */ { "<unknown>", 0 }, /* PT_WDIFF 15 */ { "sum of weighted differences", PT_SYM }, /* PT_CHI2 16 */ { "possibilistic chi^2 measure", PT_SYM }, /* 17 */ { "<unknown>", 0 }, /* 18 */ { "<unknown>", 0 }, /* 19 */ { "<unknown>", 0 }, /* PT_MUTSPC 20 */ { "mutual specificity", PT_SYM }, /* 21 */ { "<unknown>", 0 }, /* 22 */ { "<unknown>", 0 }, /* 23 */ { "<unknown>", 0 }, /* 24 */ { "<unknown>", 0 }, /* PT_UNKNOWN 25 */ { "<unknown>", 0 }}; /* measures for probability trees *//*---------------------------------------------------------------------- Functions to Compute Marginals----------------------------------------------------------------------*/static float _marg_sum (PTNODE *node, PTLVL *lvl){ /* --- marginalize by summing */ int i, j; /* loop variables */ PTNODE **p; /* to traverse the child vector */ float *bp, *bc, *c; /* to traverse the buffers/counters */ float sum = 0; /* sum of frequencies */ assert(node && lvl /* check the function arguments */ && (lvl[1].index < 0)); /* (next level must be leaves) */ for (bc = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) *--bc = 0; /* clear child marginals */ p = node->children +lvl->cnt; /* traverse the child vector */ for (bp = lvl[0].buf +(j = lvl[0].cnt); --j >= 0; ) { *--bp = 0; /* clear parent marginals */ if (!*--p) continue; /* if there is no child, skip value */ c = ((PTLEAF*)*p)->cnts +lvl[1].cnt; for (bc = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) { * bp += *--c; /* aggregate the values */ *--bc += * c; /* of the joint distribution */ } /* in the parent/child buffers */ sum += *bp; /* compute the global total */ } return lvl[0].total = sum; /* store the total frequency */} /* _marg_sum() *//*--------------------------------------------------------------------*/static float _marg_max (PTNODE *node, PTLVL *lvl){ /* --- marginalize by maximizing */ int i, j; /* loop variables */ PTNODE **p; /* to traverse the child vector */ float *bp, *bc, *c; /* to traverse the buffers/counters */ float max = 0; /* maximum of frequencies */ assert(node && lvl /* check the function arguments */ && (lvl[1].index < 0)); /* (next level must be leaves) */ for (bc = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) *--bc = 0; /* clear child marginals */ p = node->children +lvl->cnt; /* traverse the child vector */ for (bp = lvl[0].buf +(j = lvl[0].cnt); --j >= 0; ) { *--bp = 0; /* clear parent marginals */ if (!*--p) continue; /* if there is no child, skip value */ c = ((PTLEAF*)*p)->cnts +lvl[1].cnt; for (bc = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) { if (*--c > *bp) *bp = *c; /* aggregate the values */ if (*c > *--bc) *bc = *c; /* of the joint distribution */ } /* in the parent/child buffers */ if (*bp > max) max = *bp; /* compute the global maximum */ } return lvl[0].total = max; /* store the global maximum */} /* _marg_max() *//*---------------------------------------------------------------------- Probabilistic Conditional Dependence Measures----------------------------------------------------------------------*/static double _info (PTNODE *node, PTLVL *lvl, int measure, double *params){ /* --- Shannon information measures */ int i, j; /* loop variables */ PTNODE **p; /* to traverse the child vector */ float *c; /* to traverse the buffers/counters */ double s_i, s_j, s_ij; /* sums for entropy computation */ double info, t; /* information gain (ratio), buffer */ assert(node && lvl); /* check the function arguments */ s_i = s_j = s_ij = 0; /* and initialize the sums */ for (c = lvl[1].buf +(i = lvl[1].cnt); --i >= 0; ) if (*--c > 0) s_i += *c *log(*c); for (c = lvl[0].buf +(j = lvl[0].cnt); --j >= 0; ) if (*--c > 0) s_j += *c *log(*c); for (p = node->children +(j = lvl[0].cnt); --j >= 0; ) { if (!*--p) continue; /* traverse the existing children */ t = 0; /* process the joint distribution */ for (c = ((PTLEAF*)*p)->cnts +(i = lvl[1].cnt); --i >= 0; ) if (*--c > 0) t += *c *log(*c); s_ij += t; /* process leaves individually and */ } /* sum the results (higher accuracy) */ t = lvl[0].total; t *= log(t);/* compute N *log(N) only once */ s_i = t -s_i; /* N H_i = -N sum_i p_i *log(p_i) */ s_j = t -s_j; /* N H_j = -N sum_j p_j *log(p_j) */ s_ij = t -s_ij; /* N H_ij = -N sum_ij p_ij *log(p_ij) */ info = s_i +s_j -s_ij; /* compute information gain *N *ln(2) */ switch (measure) { /* evaluate measure code */ case PT_INFSGR1: if (s_ij <= 0) return 0; info /= s_ij; break; case PT_INFSGR2: if (s_i +s_j <= 0) return 0; info /= s_i +s_j; break; default : info /= LN_2 *lvl[0].total; break; } /* form requested entropy ratio */ return (info < 0) ? 0 : info; /* return the information measure */} /* _info() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -