⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 frqtab.c

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 C
📖 第 1 页 / 共 4 页
字号:
/*----------------------------------------------------------------------  File    : frqtab.c  Contents: frequency table management  Author  : Christian Borgelt  History : 26.06.1997 file created            29.07.1997 first version completed            11.08.1997 some functions changed to #define            25.08.1997 functions ft_comb, ft_uncomb, and ft_dest added            18.09.1997 bug in measure evaluation removed            24.09.1997 function ft_alldst added            29.09.1997 bug in function ft_comb removed            30.09.1997 bug in evaluation with combined columns removed            09.02.1998 order of evaluation measures changed            24.02.1998 bug in function _wevid fixed            23.03.1998 parameters added to evaluation functions            20.03.1999 all 'float' fields/variables changed to 'double'            25.10.1999 evaluation function _wdiff added            15.09.2000 some assertions added            02.12.2000 memory alloc. improved, function ft_copy added            03.12.2000 table access optimized (concerning index -1)            02.03.2001 evaluation measure FEM_INFGBAL added            26.05.2001 computation of ln(\Gamma(n)) improved            26.09.2001 bug in clean up in ft_create removed            02.01.2002 measure FEM_SPCGBAL added, FEM_INFGBAL corrected            06.01.2002 switched to sorting functions from vecops            11.01.2002 measure FEM_CHI2NRM added            22.01.2002 computations in _wdiff improved            31.01.2002 computation of Gini index measures improved            02.02.2002 BD and description length measures reprogrammed            04.02.2002 quadratic information measures added            04.07.2002 bug in function _bdm fixed (equiv. sample size)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "vecops.h"#include "gamma.h"#include "frqtab.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#ifdef FT_EVAL#define	M_PI         3.14159265358979323846  /* \pi   */#define LN_2         0.69314718055994530942  /* ln(2) */#define EPSILON      1e-12      /* to handle roundoff errors *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (FRQTAB* ftab, int measure, double *params);/*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/static const char* mnames[FEM_UNKNOWN+1] = {  /* FEM_NONE      0 */ "no measure",  /* FEM_INFGAIN   1 */ "information gain",  /* FEM_INFGBAL   2 */ "balanced information gain",  /* FEM_INFGR     3 */ "information gain ratio",  /* FEM_INFSGR1   4 */ "symmetric information gain ratio 1",  /* FEM_INFSGR2   5 */ "symmetric information gain ratio 2",  /* FEM_QIGAIN    1 */ "quadratic information gain",  /* FEM_QIGBAL    2 */ "balanced quadratic information gain",  /* FEM_QIGR      3 */ "quadratic information gain ratio",  /* FEM_QISGR1    4 */ "symmetric quadratic information gain ratio 1",  /* FEM_QISGR2    5 */ "symmetric quadratic information gain ratio 2",  /* FEM_GINI      6 */ "Gini index",  /* FEM_GINISYM   7 */ "symmetric Gini index",  /* FEM_GINIMOD   8 */ "modified Gini index",  /* FEM_RELIEF    9 */ "relief measure",  /* FEM_WDIFF    10 */ "sum of weighted differences",  /* FEM_CHI2     11 */ "chi^2 measure",  /* FEM_CHI2NRM  12 */ "normalized chi^2 measure",  /* FEM_WEVID    13 */ "weight of evidence",  /* FEM_RELEV    14 */ "relevance",  /* FEM_BDM      15 */ "Bayesian-Dirichlet / K2 metric",  /* FEM_BDMOD    16 */ "modified Bayesian-Dirichlet / K2 metric",  /* FEM_RDLREL   17 */ "reduction of description length (rel. freq.)",  /* FEM_RDLABS   18 */ "reduction of description length (abs. freq.)",  /* FEM_STOCO    19 */ "stochastic complexity",  /* FEM_SPCGAIN  20 */ "specificity gain",  /* FEM_SPCGAIN  21 */ "balanced specificity gain",  /* FEM_SPCGR    22 */ "specificity gain ratio",  /* FEM_SPCSGR1  23 */ "symmetric specificity gain ratio 1",  /* FEM_SPCSGR2  24 */ "symmetric specificity gain ratio 2",  /* FEM_UNKNOWN  25 */ "<unknown measure>",};                              /* names of evaluation measures *//*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static double _nsp (double *dist, int cnt){                               /* --- compute nonspecificity */  double nsp  = 0;              /* nonspecificity */  double prec = 0;              /* preceding frequency */  double t;                     /* temporary buffer */  assert(dist && (cnt >= 0));   /* check the function arguments */  v_dblsort(dist, cnt);         /* sort the frequencies */  for ( ; cnt > 1; cnt--) {     /* and then traverse them */    t = *dist -prec; prec = *dist++;    if (t > 0) nsp += t *log(cnt);  }                             /* calculate and return the */  return nsp;                   /* nonspecificity of the distribution */}  /* _nsp() *//*----------------------------------------------------------------------  Evaluation Functions----------------------------------------------------------------------*/static double _info (FRQTAB *ftab, int measure, double *params){                               /* --- Shannon information measures */  int    x, y;                  /* loop variables */  double **c;                   /* to traverse the table columns */  double *fx, *fy, *fxy;        /* to traverse the frequencies */  double s_x, s_y, s_xy;        /* sums for entropy computation */  double info, t;               /* information gain (ratio), buffer */  assert(ftab);                 /* check the function argument */  if (ftab->known < EPSILON) return 0;  s_x = s_y = s_xy = 0;         /* process the row distribution */  for (fy = ftab->frq_y +(y = ftab->ycnt); --y >= 0; )    if (*--fy > 0) s_y += *fy *log(*fy);  c = ftab->frq_xy +ftab->xcnt; /* process the column distribution */  for (fx = ftab->frq_x +(x = ftab->xcnt); --x >= 0; --c) {    if (*--fx <= 0) continue;   /* skip empty and combined columns */    s_x += *fx *log(*fx);       /* process the column distribution */    t = 0;                      /* and a conditional distribution */    for (fxy = *c +(y = ftab->ycnt); --y >= 0; )      if (*--fxy > 0) t += *fxy *log(*fxy);    s_xy += t;                  /* process columns individually and */  }                             /* sum the results (higher accuracy) */  t = ftab->known; t *= log(t); /* compute N *log(N) only once */  s_x  = t -s_x;                /* N H_x  = -N sum_x  p_x  *log(p_x)  */  s_y  = t -s_y;                /* N H_y  = -N sum_y  p_y  *log(p_y)  */  s_xy = t -s_xy;               /* N H_xy = -N sum_xy p_xy *log(p_xy) */  info = s_x +s_y -s_xy;        /* compute information gain *N *ln(2) */  switch (measure & 0xff) {     /* evaluate the measure code */    case FEM_INFGBAL: info /= log(ftab->xcnt) *ftab->known; break;    case FEM_INFGR  : if (s_x      <= 0) return 0;                      info /= s_x;                          break;    case FEM_INFSGR1: if (s_xy     <= 0) return 0;                      info /= s_xy;                         break;    case FEM_INFSGR2: if (s_x +s_y <= 0) return 0;                      info /= s_x +s_y;                     break;    default:          info /= LN_2 *ftab->known;            break;  }                             /* form requested entropy ratio */  if (measure & FEF_WGTD) return info *(ftab->known/ftab->frq);  return info;                  /* return the information measure */}  /* _info() *//*--------------------------------------------------------------------*/static double _quad (FRQTAB *ftab, int measure, double *params){                               /* --- quadratic information measures */  int    x, y;                  /* loop variables */  double **c;                   /* to traverse the table columns */  double *fx, *fy, *fxy;        /* to traverse the frequencies */  double s_x, s_y, s_xy;        /* sum of squared frequencies */  double quad, t;               /* information gain (ratio), buffer */  assert(ftab);                 /* check the function argument */  if (ftab->known < EPSILON) return 0;  s_y = s_x = s_xy = 0;         /* process the row distribution */  for (fy = ftab->frq_y +(y = ftab->ycnt); --y >= 0; ) {    --fy; s_y += *fy * *fy; }   /* compute sum_y N(y)^2 */  c = ftab->frq_xy +ftab->xcnt; /* process the joint distribution */  for (fx = ftab->frq_x +(x = ftab->xcnt); --x >= 0; --c) {    if (*--fx <= 0) continue;   /* skip empty and combined columns */    s_x += *fx * *fx;           /* process the column distribution */    t = 0;                      /* and a conditional distribution */    for (fxy = *c +(y = ftab->ycnt); --y >= 0; ) {      --fxy; t += *fxy * *fxy; }    s_xy += t;                  /* compute sum_xy N(x,y)^2 */  }  t = ftab->known; t *= t;      /* compute N^2 only once */  s_x  = t -s_x;                /* N^2/2 H^2_i  = N^2 -sum_i  N_i^2  */  s_y  = t -s_y;                /* N^2/2 H^2_j  = N^2 -sum_j  N_j^2  */  s_xy = t -s_xy;               /* N^2/2 H^2_ij = N^2 -sum_ij N_ij^2 */  quad = s_x +s_y -s_xy;        /* compute information gain *N *ln(2) */  switch (measure & 0xff) {     /* evaluate the measure code */    case FEM_QIGBAL: quad /= t *(1 -1/ftab->xcnt); break;    case FEM_QIGR  : if (s_x      <= 0) return 0;                     quad /= s_x;                  break;    case FEM_QISGR1: if (s_xy     <= 0) return 0;                     quad /= s_xy;                 break;    case FEM_QISGR2: if (s_x +s_y <= 0) return 0;                     quad /= s_x +s_y;             break;    default:         quad /= 0.5 *t;               break;  }                             /* form requested entropy ratio */  if (measure & FEF_WGTD) return quad *(ftab->known/ftab->frq);  return quad;                  /* return the information measure */}  /* _quad() *//*--------------------------------------------------------------------*/static double _gini (FRQTAB *ftab, int measure, double *params){                               /* --- Gini index/relief measure */  int    x, y;                  /* loop variables */  double **c;                   /* to traverse the table columns */  double *fx, *fy, *fxy;        /* to traverse the frequencies */  double s_x, s_y, s_xy;        /* sum of squared frequencies */  double w_xy, w_yx;            /* weighted sum of squared freq. */  double gini, t;               /* Gini index / relief measure */  assert(ftab);                 /* check the function argument */  if (ftab->known < EPSILON) return 0;  s_y = s_xy = w_yx = 0;        /* process the row distribution */  for (fy = ftab->frq_y +(y = ftab->ycnt); --y >= 0; ) {    --fy; s_y += *fy * *fy; }   /* compute sum_y N(y)^2 */  c = ftab->frq_xy +ftab->xcnt; /* process the joint distribution */  for (fx = ftab->frq_x +(x = ftab->xcnt); --x >= 0; --c) {    if (*--fx <= 0) continue;   /* skip empty and combined columns */    t = 0;                      /* process a conditional distribution */    for (fxy = *c +(y = ftab->ycnt); --y >= 0; ) {      --fxy; t += *fxy * *fxy; }    s_xy += t;                  /* compute sum_xy N(x,y)^2 and */    w_yx += t / *fx;            /* sum_x 1/N(x) sum_y N(x,y)^2 */  }  if ((measure & 0xff) == FEM_GINI) {    return (w_yx -s_y /ftab->known)         / ((measure & FEF_WGTD) ? ftab->frq : ftab->known);  }                             /* compute and return the Gini index */  s_x = w_xy = 0;               /* process the column distribution */  for (fx = ftab->frq_x +(x = ftab->xcnt); --x >= 0; ) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -