📄 ptree5.c

📁 数据挖掘中的一算法 ines算法 c下实现的。适合初学习数据挖掘者借鉴
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*----------------------------------------------------------------------  File    : ptree5.c  Contents: probability/possibility tree management            local structure learning functions  Author  : Christian Borgelt  History : 22.10.1995 file created as ctree.c            12.01.2002 local structure learning moved to separate file            04.07.2002 function logGa made external            17.07.2002 function tables redesigned, _nsp moved to ptree3            18.07.2002 optimizations for independent gains added            17.01.2003 normalization of possibilistic measures added            18.01.2003 bug in computation of balanced spc. gain fixed----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "gamma.h"#include "ptree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define	M_PI   3.14159265358979323846  /* \pi   */#define M_LN2  0.69314718055994530942  /* ln(2) *//* --- flags for leaf merger evaluation --- */#define MRG_INDEP     0         /* gain is independent of mergers */#define MRG_LOCAL     1         /* local  recomputation necessary */#define MRG_GLOBAL    2         /* global recomputation necessary *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2);typedef struct {                /* --- function table element */  EVALFN *evalfn;               /* leaf merger evaluation function */  int    type;                  /* evaluation type, e.g. MRG_INDEP */} FTE;                          /* (function table element) */typedef struct {                /* --- merge table --- */  EVALFN *evalfn;               /* leaf merger evaluation function */  double minimp;                /* minimal improvement */  int    leafcnt;               /* number of leaves in the tree */  int    rowcnt;                /* current number of table rows */  int    *bids;                 /* indices of best mergers per row */   double **evals;               /* evaluations of mergers */  PTLEAF *leaves[1];            /* list of leaf nodes */} MRGTAB;                       /* (merge table) *//*----------------------------------------------------------------------  Evaluation Functions----------------------------------------------------------------------*/static double _info (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- Shannon information change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2, c, n1, n2;   /* to traverse the counters */  double s, t;                  /* sum of entropy terms */  assert(pt && leaf1 && leaf2   /* check the function arguments */     && (leaf1->total > 0) && (leaf2->total > 0));  pt->res1[0] = pt->res0[0];    /* simply copy N H_i (no change) */  n1 = leaf1->total;            /* get the two leaf totals, sum them, */  n2 = leaf2->total; c = n1+n2; /* and update N H_j (marginal dist.) */  t  = n1*log(n1) +n2*log(n2) -c*log(c);  pt->res1[1] = pt->res0[1] +((t < 0) ? t : 0);  lvl = pt->levels +pt->concnt; /* get the leaf level description */  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the counters */  for (s = 0; --i >= 0; ) {     /* of the two leaves and */    c = *--c1 + *--c2;          /* compute the entropy term */    if (c > 0) s += c*log(c);   /* of a merged leaf, then */  }                             /* update N H_ij (joint distribution) */  t = leaf1->eval +leaf2->eval -s;  pt->res1[2] = pt->res0[2] +((t < 0) ? t : 0);  pt->res1[3] = pt->res0[3];    /* note the normalization factors */  i = pt->leafcnt -1;           /* (for one leaf less) */  pt->res1[4] = (i > 1) ? 1/(pt->total *log(i)) : 0;  return s;                     /* return the leaf evaluation */}  /* _info() *//*--------------------------------------------------------------------*/static double _quad (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- quadratic information change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2, c, n1, n2;   /* to traverse the counters */  double s, t;                  /* sum of entropy terms */  assert(pt && leaf1 && leaf2); /* check the function arguments */  pt->res1[0] = pt->res0[0];    /* simply copy N^2/2 H^2_i */  n1 = leaf1->total;            /* get the leaf totals, */  n2 = leaf2->total; c = n1+n2; /* sum them (total of merged leaf), */  t  = n1*n1 +n2*n2 -c*c;       /* and update N^2/2 H^2_j */  pt->res1[1] = pt->res0[1] +((t < 0) ? t : 0);  lvl = pt->levels +pt->concnt; /* get the leaf level description */  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the leaf counters */  for (s = 0; --i >= 0; ) {     /* of the two leaves and */    c = *--c1 + *--c2;          /* compute the entropy term */    s += c *c;                  /* of a merged leaf, */  }                             /* then update N^2/2 H^2_ij */  t = leaf1->eval +leaf2->eval -s;  pt->res1[2] = pt->res0[2] +((t < 0) ? t : 0);  pt->res1[3] = pt->res0[3];    /* note the normalization factors */  i = pt->leafcnt -1;           /* (for one leaf less) */  pt->res0[4] = (i > 0) ? 1/(s *(1 -1/i)) : 0;  return s;                     /* return the leaf evaluation */}  /* _quad() *//*--------------------------------------------------------------------*/static double _spec (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- specificity change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  PTLEAF *leaf;                 /* to traverse the leaves */  float  *b, *c1, *c2;          /* to traverse the buffers/counters */  assert(pt && (pt->total > 0));/* check the function argument */  pt->res1[0] = pt->res0[0];    /* simply copy S_i = nsp(child) */  lvl = pt->levels +pt->concnt; /* get the leaf level and */  b = pt->buf;                  /* process distribution on conditions */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ)    if ((leaf != leaf1) && (leaf != leaf2) && (leaf->total > 0))      *b++ = leaf->total;       /* collect the leaf totals */  *b++ = leaf1->total +leaf2->total;  pt->res1[1] = pt_nsp(pt->buf, (int)(b -pt->buf));  b = pt->buf;                  /* process the joint distribution */  for (leaf = (PTLEAF*)lvl->list; leaf; leaf = leaf->succ) {    if ((leaf == leaf1) || (leaf == leaf2) || (leaf->total <= 0))      continue;                 /* traverse nonempty leaves */    for (c1 = leaf->cnts +(i = lvl->cnt); --i >= 0; )      if (*--c1 > 0) *b++ = *c1;/* copy nonzero counters to buffer */  }                             /* and compute S_ij from them */  c1 = leaf1->cnts +(i = lvl->cnt);  c2 = leaf2->cnts + i;         /* add the counters for the merger */  while (--i >= 0) *b++ = *--c1 + *--c2;  pt->res1[2] = pt_nsp(pt->buf, (int)(b -pt->buf));  pt->res1[3] = pt->res0[3];    /* note normalization factors */  i = pt->leafcnt -1;           /* (for one leaf less) */  pt->res1[4] = (i > 1) ? M_LN2/log(i) : 1;  return 0;                     /* return a dummy evaluation */}  /* _spec() *//*--------------------------------------------------------------------*/static double _gini1 (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- Gini index 1 change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2, c;           /* to traverse the counters */  float  n1,  n2,  n;           /* leaf counter totals */  double s, t;                  /* sum of squared frequencies */  assert(pt && leaf1 && leaf2   /* check the function arguments */     && (leaf1->total > 0) && (leaf2->total > 0));  pt->res1[0] = pt->res0[0];    /* simply copy sum_i N_i^2 */  n1 = leaf1->total;            /* get the leaf totals, */  n2 = leaf2->total; n = n1+n2; /* sum them (total of merged leaf), */  t  = n*n -n1*n1 -n2*n2;       /* and update sum_j N_j^2 */  pt->res1[1] = pt->res0[1] +((t > 0) ? t : 0);  lvl = pt->levels +pt->concnt; /* get the leaf level description */  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the leaf counters */  for (s = 0; --i >= 0; ) {     /* of the two leaves and */    c = *--c1 + *--c2;          /* compute the squared frequencies */    s += c *c;                  /* of a merged leaf, */  }                             /* then update sum_ij N_ij^2 */  t = s -leaf1->eval -leaf2->eval;  pt->res1[2] = pt->res0[2] +((t > 0) ? t : 0);  pt->res1[3] = pt->res0[3] +(s/n -leaf1->eval/n1 -leaf2->eval/n2);  return s;                     /* return the leaf evaluation */}  /* _gini1() *//*--------------------------------------------------------------------*/static double _gini2 (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- Gini index 2 change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2, c, *b;       /* to traverse the counters */  float  n1,  n2,  n;           /* leaf counter totals */  double s, t;                  /* sum of squared frequencies */  assert(pt && leaf1 && leaf2   /* check the function arguments */     && (leaf1->total > 0) && (leaf2->total > 0));  pt->res1[0] = pt->res0[0];    /* simply copy sum_i N_i^2 */  n1 = leaf1->total;            /* get the leaf totals, */  n2 = leaf2->total; n = n1+n2; /* sum them (total of merged leaf), */  t  = n*n -n1*n1 -n2*n2;       /* and update sum_j N_j^2 */  pt->res1[1] = pt->res0[1] +((t > 0) ? t : 0);  lvl = pt->levels +pt->concnt; /* get the leaf level description */  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the leaf counters and */  b   = lvl->buf +i +i;         /* the second evaluation buffer */  for (s = 0; --i >= 0; ) {     /* compute the squared frequencies */    c = *--c1 + *--c2;          /* of a merged leaf and update */    s    += c *= c;             /* the buffered squared frequencies */    *--b += c -*c1**c1 -*c2**c2;  }                             /* update the sums s_ij and w_ij */  t = s -leaf1->eval -leaf2->eval;  pt->res1[2] = pt->res0[2] +((t > 0) ? t : 0);  pt->res1[3] = pt->res0[3] +(s/n -leaf1->eval/n1 -leaf2->eval/n2);  for (n = 0, c1 = b +(i = lvl->cnt); --i >= 0; )    n += *--c1 / *--b;          /* completely recompute */  pt->res1[4] = n;              /* w_ji = sum_i (1/N_i) sum_j N_ij^2 */  return s;                     /* return the leaf evaluation */}  /* _gini2() *//*--------------------------------------------------------------------*/static double _bdm (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- Bayesian-Dirichlet change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2;              /* to traverse the counters */  double s, t;                  /* sum of BD metric terms, buffer */  double p, a;                  /* prior and sensitivity */  assert(pt && leaf1 && leaf2   /* check the function arguments */     && (leaf1->total > 0) && (leaf2->total > 0));  pt->res1[0] = pt->res0[0];    /* copy the reference evaluation */  lvl = pt->levels +pt->concnt; /* get the leaf level description */  a   = (pt->params[0] > -1) ? pt->params[0]+1 : 1;  a  *= a;                      /* get the sensitivity parameter */  p   = (pt->params[1] != 0) ? pt->params[1]   : 1;  if (p < 0)                    /* get the prior/equiv. sample size */    p = (-p /lvl->cnt /pt->fullcnt) *(leaf1->pathcnt +leaf2->pathcnt);  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the leaf counters and */  for (s = 0; --i >= 0; )       /* sum the BD terms for the leaf */    s += logGa((*--c1 +*--c2) *a +p);  t  = lvl->cnt *p;             /* compute the leaf term and update */  s += logGa(t) -lvl->cnt *logGa(p)               /* the BD metric */     - logGa((leaf1->total +leaf2->total) *a +t);  pt->res1[1] = pt->res0[1] +(s -leaf1->eval -leaf2->eval);  return s;                     /* return the leaf evaluation */}  /* _bdm() *//*--------------------------------------------------------------------*/static double _bdmod (PTREE *pt, PTLEAF *leaf1, PTLEAF *leaf2){                               /* --- mod. Bayesian-Dirichlet change */  int    i;                     /* loop variable */  PTLVL  *lvl;                  /* leaf level description */  float  *c1, *c2, c, n;        /* to traverse the counters */  double t, s;                  /* sum of BD metric terms, buffer */  double p, a;                  /* prior and sensitivity */  assert(pt && leaf1 && leaf2   /* check the function arguments */     && (leaf1->total > 0) && (leaf2->total > 0));  pt->res1[0] = pt->res0[0];    /* copy the reference evaluation */  lvl = pt->levels +pt->concnt; /* get the leaf level description */  a   = (pt->params[0] > -1) ? pt->params[0]+1 : 1;  if (a < 0) a = 0;             /* get the sensitivity parameter */  p   = (pt->params[1] != 0) ? pt->params[1]   : 1;  if (p < 0)                    /* get the prior/equiv. sample size */    p = (-p /lvl->cnt /pt->fullcnt) *(leaf1->pathcnt +leaf2->pathcnt);  c1  = leaf1->cnts +(i = lvl->cnt);  c2  = leaf2->cnts + i;        /* traverse the leaf counters and */  for (s = 0; --i >= 0; ) {     /* sum the BD terms for the counters */    c = *--c1 +*--c2; t = c *a +p; s += logGa(t +c) -logGa(t); }  n  = leaf1->total +leaf2->total;  t  = n *a +lvl->cnt *p;       /* compute the leaf term and */  s += logGa(t) -logGa(t +n);   /* update the modified BD metric */  pt->res1[1] = pt->res0[1] +(s -leaf1->eval -leaf2->eval);
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -