⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ptree4.c

📁 数据挖掘中的一算法 ines算法 c下实现的。适合初学习数据挖掘者借鉴
💻 C
📖 第 1 页 / 共 3 页
字号:
/*----------------------------------------------------------------------  File    : ptree4.c  Contents: probability/possibility tree management            evaluation functions  Author  : Christian Borgelt  History : 22.10.1995 file created as ctree.c            03.11.1995 first version of pt_eval programmed            11.12.1995 information gain (ratio) added            16.01.1996 ln(K2 metric) replaced by log_2(K2 metric)/N            05.02.1996 some evaluation measures added            07.02.1996 evaluation measure 'PT_GINI' added            23.02.1996 functions pt_mname and pt_minfo added            23.06.1996 comp. of specificity gain (ratio) redesigned            15.11.1996 sym. information/specificity gain ratio added            06.03.1997 some evaluation measures added            07.03.1997 functions _chi2 and _muts improved            10.03.1997 second symmetric info./spec. gain ratio added            15.02.1998 marginalization moved to function pt_eval            20.02.1998 Bayesian Dirichlet l.e. uniform metric added            24.02.1998 bug in function _woevid fixed            23.02.1999 changed to Bayes factor computation (K2,BDeu)            30.04.1999 several assertions added            13.11.1999 evaluation measure 'PT_WDIFF' added            26.05.2001 computation of ln(\Gamma(n)) improved            02.02.2002 major redesign completed            04.02.2002 quadratic information measures added            05.03.2002 bug in function _ginit1 fixed            01.07.2002 evaluation adapted to number of paths to a leaf            02.07.2002 functions _bdm and _bdmod redesigned            04.07.2002 function logGa made external            17.07.2002 function _spec redesigned, _nsp moved to ptree3.c            19.07.2002 function _sort made safe w.r.t. links            17.01.2003 normalization of possibilistic measures added            18.01.2003 bug in computation of balanced spc. gain fixed----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <math.h>#include <float.h>#include <assert.h>#include "gamma.h"#include "ptree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define	M_PI   3.14159265358979323846  /* \pi   */#define M_LN2  0.69314718055994530942  /* ln(2) *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef int    INITFN (PTREE *pt);typedef double EVALFN (PTREE *pt);typedef double LEAFFN (PTREE *pt, PTLVL *lvl, PTLEAF *leaf);typedef struct {                /* --- function table element --- */  INITFN *initfn;               /* initialization function */  EVALFN *evalfn;               /* evaluation function */  LEAFFN *leaffn;               /* leaf evaluation function */} FTE;                          /* (function table element) *//*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static void _sort (PTLVL *lvl, PTNODE *node){                               /* --- sort the leaves */  int    i;                     /* loop variable */  PTNODE *child;                /* to traverse the child nodes */  assert(lvl && node);          /* check the function arguments */  if (lvl->index < 0) {         /* if on the leaf level, */    node->succ = lvl->list;     /* add the current leaf node */    lvl->list = node; }         /* at the head of the level list */  else {                        /* if at an inner node */    for (i = (lvl++)->cnt; --i >= 0; ) {      child = node->children[i];/* traverse the children */      if (child && (child->parent == node) && (child->index == i))        _sort(lvl, child);      /* if the child node exists and */    }                           /* it is a genuine child (no link), */  }                             /* sort the nodes recursively */}  /* _sort() *//*--------------------------------------------------------------------*/static void _sumsqr (PTREE *pt){                               /* --- sum squared frequencies */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *c;                    /* to traverse the buffer/counters */  double s_i, s_j, s_ij, s;     /* sums of squared frequencies */  assert(pt && (pt->total > 0));/* check the function arguments */  s_i = s_j = s_ij = 0;         /* initialize the sums */  lvl = pt->levels +pt->concnt; /* get the leaf level */  for (c = lvl->buf +(i = lvl->cnt); --i >= 0; ) {    --c; s_i += *c * *c; }      /* sum squared marginals */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse nonempty leaves */    s_j += leaf->total *leaf->total;    s = 0;                      /* traverse the counter vector */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) {      --c; s += *c * *c; }      /* sum squared joint distrib. terms */    s_ij += leaf->eval = s;     /* note and sum the leaf terms */  }  pt->res0[0] = s_i;            /* s_i  = sum_i  N_i^2  */  pt->res0[1] = s_j;            /* s_j  = sum_j  N_j^2  */  pt->res0[2] = s_ij;           /* s_ij = sum_ij N_ij^2 */}  /* _sumsqr() *//*----------------------------------------------------------------------  Information/Specificity Functions----------------------------------------------------------------------*/static int _info (PTREE *pt){                               /* --- Shannon information init. */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *c;                    /* to traverse the buffers/counters */  double s_i, s_j, s_ij, n, t;  /* sums of entropy terms, buffers */  assert(pt && (pt->total > 0));/* check the function arguments */  s_ij = s_i = s_j = 0;         /* and initialize the sums */  lvl = pt->levels +pt->concnt; /* traverse the leaf level buffer */  for (c = lvl->buf +(i = lvl->cnt); --i >= 0; )    if (*--c > 0) s_i += *c *log(*c);  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse nonempty leaves */    s_j += leaf->total *log(leaf->total);    t = 0;                      /* traverse the counter vector */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; )      if (*--c > 0) t += *c *log(*c);    s_ij += leaf->eval = t;     /* note and sum the leaf terms */  }  n = pt->total; t = n *log(n); /* compute N *log(N) only once */  pt->res0[0] = t -s_i;         /* N H_i  = -N sum_i  p_i  *log(p_i)  */  pt->res0[1] = t -s_j;         /* N H_j  = -N sum_j  p_j  *log(p_j)  */  pt->res0[2] = t -s_ij;        /* N H_ij = -N sum_ij p_ij *log(p_ij) */  pt->res0[3] = 1/(M_LN2 *n);   /* note normalization factors */  pt->res0[4] = (pt->leafcnt > 1) ? 1/(n *log(pt->leafcnt)) : 0;  return 0;                     /* return 'ok' */}  /* _info() *//*--------------------------------------------------------------------*/static int _quad (PTREE *pt){                               /* --- quadratic information init. */  double t;                     /* squared total frequency */  assert(pt && (pt->total > 0));/* check the function argument */  _sumsqr(pt);                  /* sum squares of frequencies */  t = pt->total; t *= t;        /* compute N^2 only once */  pt->res0[0] = t -pt->res0[0]; /* N^2/2 H^2_i  = N^2 -sum_i  N_i^2  */  pt->res0[1] = t -pt->res0[1]; /* N^2/2 H^2_j  = N^2 -sum_j  N_j^2  */  pt->res0[2] = t -pt->res0[2]; /* N^2/2 H^2_ij = N^2 -sum_ij N_ij^2 */  pt->res0[3] = 2/t;            /* note normalization factors */  pt->res0[4] = (pt->leafcnt > 0) ? 1/(t *(1 -1/pt->leafcnt)) : 0;  return 0;                     /* return 'ok' */}  /* _quad() *//*--------------------------------------------------------------------*/static int _spec (PTREE *pt){                               /* --- specificity measures init. */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *b, *c;                /* to traverse the buffers/counters */  assert(pt && (pt->total > 0));/* check the function argument */  lvl = pt->levels +pt->concnt; /* get the leaf level and compute */  pt->res0[0] = pt_nsp(lvl->buf, lvl->cnt);         /* nsp(child) */  if (!pt->buf) {               /* if there is no evaluation buffer */    pt->buf = (float*)malloc(lvl->cnt *pt->leafcnt *sizeof(float));    if (!pt->buf) return -1;    /* allocate an evaluation buffer */  }                             /* for the nonspecificity comps. */  b = pt->buf;                  /* process distribution on conditions */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ)    if (leaf->total > 0) *b++ = leaf->total;  pt->res0[1] = pt_nsp(pt->buf, (int)(b -pt->buf));  b = pt->buf;                  /* process the joint distribution */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse nonempty leaves */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; )      if (*--c > 0) *b++ = *c;  /* copy nonzero counters to buffer */  }                             /* and compute S_ij from them */  pt->res0[2] = pt_nsp(pt->buf, (int)(b -pt->buf));  pt->res0[3] = 1/M_LN2/((pt->tplcnt > 0) ? pt->tplcnt : 1);  pt->res0[4] = (pt->leafcnt > 1) ? M_LN2/log(pt->leafcnt) : 1;  return 0;                     /* return 'ok' */}  /* _spec() *//*--------------------------------------------------------------------*/static double _gain (PTREE *pt){ double *r = pt->res1;         /* --- information/specificity gain */  return (r[0] +r[1] -r[2]) *r[3];}  /* _gain() */                /* H_i +H_j -H_ij */static double _gbal (PTREE *pt){ double *r = pt->res1;         /* --- balanced info./spec. gain */  return (r[0] +r[1] -r[2]) *r[3] *r[4];}  /* _gbal() */                /* (H_i +H_j -H_ij) /r_j */static double _gratio (PTREE *pt){ double *r = pt->res1;         /* --- info./spec. gain ratio */  return (r[1] > 0) ? (r[0] +r[1] -r[2]) /r[1] : 0;}  /* _grat() */                /* (H_i +H_j -H_ij) /H_j */static double _sgr1 (PTREE *pt){ double *r = pt->res1;         /* --- sym. info./spec. gain ratio 1 */  return (r[2] > 0) ? (r[0] +r[1] -r[2]) /r[2] : 0;}  /* _sgr1() */                /* (H_i +H_j -H_ij) /H_ij */static double _sgr2 (PTREE *pt){ double *r = pt->res1;         /* --- sym. info./spec. gain ratio 2 */  return (r[0] +r[1] > 0) ? (r[0] +r[1] -r[2]) /(r[0] +r[1]) : 0;}  /* _sgr2() */                /* (H_i +H_j -H_ij) /(H_i +H_j) *//*----------------------------------------------------------------------  Gini Index and Related Measures----------------------------------------------------------------------*/static int _ginit1 (PTREE *pt){                               /* --- Gini index initialization */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  double w_ij;                  /* weighted sum of squared freqs. */  assert(pt && (pt->total > 0));/* check the function arguments */  _sumsqr(pt); w_ij = 0;        /* compute sums of squared freqs. */  lvl = pt->levels +pt->concnt; /* get the leaf level */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ)    if (leaf->total > 0) w_ij += leaf->eval /leaf->total;  pt->res0[3] = w_ij;           /* w_ij = sum_j (1/N_j) sum_i N_ij^2 */  return 0;                     /* return 'ok' */}  /* _ginit1() */ /*--------------------------------------------------------------------*/static int _ginit2 (PTREE *pt){                               /* --- symmetric Gini index init. */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *b, *c;                /* to traverse the buffer/counters */  double w_ij, w_ji;            /* weighted sums of squared freqs. */  assert(pt && (pt->total > 0));/* check the function arguments */  _sumsqr(pt); w_ij = w_ji = 0; /* compute sums of squared freqs. */  lvl = pt->levels +pt->concnt; /* get the leaf level and */  b   = lvl->buf +lvl->cnt;     /* the second evaluation buffer */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if (leaf->total <= 0) continue; /* traverse nonempty leaves */    w_ij += leaf->eval / leaf->total;    b    += lvl->cnt;           /* traverse the counter vector */    for (c = leaf->cnts +(i = lvl->cnt); --i >= 0; ) {      --c; *--b += *c * *c; }   /* sum the squared counters */  }                             /* in the evaluation buffer */  for (c = b +(i = lvl->cnt); --i >= 0; )    w_ji += *--c / *--b;        /* weight and sum the terms */  pt->res0[3] = w_ij;           /* w_ij = sum_j (1/N_j) sum_i N_ij^2 */  pt->res0[4] = w_ji;           /* w_ji = sum_i (1/N_i) sum_j N_ij^2 */  return 0;                     /* return 'ok' */}  /* _ginit2() */ /*--------------------------------------------------------------------*/static double _gini (PTREE *pt){ double *r = pt->res1;         /* --- normal Gini index */  return (r[3] -r[0] /pt->total) /pt->total;}  /* _gini() */                /* w_ij /N - s_i /N^2 */static double _ginisym (PTREE *pt)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -