⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbayes.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/*----------------------------------------------------------------------  File    : nbayes.c  Contents: Naive Bayes classifier management  Author  : Christian Borgelt  History : 1998.12.07 file created            1998.12.08 nbc_create, nbc_clone, nbc_delete, nbc_add prog.            1998.12.10 function nbc_desc completed            1998.12.11 function nbc_exec completed            1998.12.12 function nbc_parse completed            1998.12.16 all functions debugged            1999.02.13 tuple parameters added to nbc_add and nbc_exec            1999.01.10 execution for one att. made a separate function            1999.03.11 function nbc_induce added            1999.03.25 distrib. of tuple weight for null values added            1999.03.27 functions nbc_exp und nbc_var added            1999.05.15 automatic frequency vector resizing added            2000.11.10 function nbc_exec adapted            2000.11.18 function nbc_setup added, nbc_exec adapted            2000.11.21 redesign completed            2001.02.11 bug in function nbc_mark (> instead of >=) fixed            2001.07.15 parser improved (global variables removed)            2001.07.16 adapted to modified module scan            2001.07.17 parser improved (conditional look ahead)            2003.04.26 function nbc_rand added            2004.04.15 zero variances replaced by EPSILON            2004.08.12 adapted to new module parse            2007.02.13 adapted to modified module attset            2007.03.21 function nbc_exec extended (posterior probs.)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "nbayes.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define	M_PI        3.14159265358979323846  /* \pi */#define EPSILON     1e-12       /* to handle roundoff errors */#define BLKSIZE     16          /* block size for vectors *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef struct {                /* --- selectable attribute --- */  int    attid;                 /* attribute identifier */  double errs;                  /* number of misclassifications */} SELATT;                       /* (selectable attribute) *//*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/#ifdef NBC_INDUCEstatic int _clsrsz (NBC *nbc, int clscnt){                               /* --- resize class dependent vectors */  int    i, k, n;               /* loop variables, buffer */  int    clsvsz;                /* size of the class dep. vectors */  DVEC   *dvec;                 /* to traverse the distrib. vectors */  NORMD  *normd;                /* to traverse the normal   distribs. */  DISCD  *discd;                /* to traverse the discrete distribs. */  double *frq;                  /* to traverse the frequency vectors */  assert(nbc && (clscnt >= 0)); /* check the function arguments */  /* --- resize the class dependent vectors --- */  clsvsz = nbc->clsvsz;         /* get the class dep. vector size */  if (clscnt >= clsvsz) {       /* if the vectors are too small */    clsvsz += (clsvsz > BLKSIZE) ? clsvsz >> 1 : BLKSIZE;    if (clscnt >= clsvsz) clsvsz = clscnt;    frq = (double*)realloc(nbc->frqs, clsvsz *4 *sizeof(double));    if (!frq) return -1;        /* resize the frequencies vector */    nbc->frqs   = frq;          /* and set the new vector */    nbc->priors = nbc->frqs   +clsvsz;  /* organize the rest */    nbc->posts  = nbc->priors +clsvsz;  /* of the allocated */    nbc->cond   = nbc->posts  +clsvsz;  /* memory block */    n = clsvsz -nbc->clsvsz;    /* calc. number of new vector fields */    for (frq += clsvsz, k = n; --k >= 0; )      *--frq = 0;               /* clear the new vector fields */    for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {      if ((--dvec)->type == 0)  /* traverse all attributes */        continue;               /* except the class attribute */      if (dvec->type == AT_NOM){/* if the attribute is nominal */        discd = (DISCD*)realloc(dvec->discds, clsvsz *sizeof(DISCD));        if (!discd) return -1;  /* resize the discrete dists. vector */        dvec->discds = discd;   /* and set the new vector */        for (discd += clsvsz, k = n; --k >= 0; ) {          (--discd)->cnt = 0; discd->frqs = NULL;        } }                     /* clear the new vector fields */      else {                    /* if the attribute is numeric */        normd = (NORMD*)realloc(dvec->normds, clsvsz *sizeof(NORMD));        if (!normd) return -1;  /* resize the normal dists. vector */        dvec->normds = normd;   /* and set the new vector */        for (normd += clsvsz, k = n; --k >= 0; ) {          (--normd)->cnt = 0; normd->sv = normd->sv2 = 0; }      }                         /* clear the new vector fields */    }  /* for (dvec = ... */    nbc->clsvsz = clsvsz;       /* set new size of the class vectors */  }  /* if (clscnt >= clsvsz) ... */  /* --- create new value frequency vectors --- */  for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {    if ((--dvec)->type != AT_NOM)      continue;                 /* traverse all nominal attributes */    discd = dvec->discds +clscnt;    for (k = clscnt -nbc->clscnt; --k >= 0; ) {      (--discd)->frqs =         /* allocate a value frequency vector */      frq = (double*)malloc(dvec->valvsz *2 *sizeof(double));      if (!frq) break;          /* set the probabilities vector */      discd->probs = frq +dvec->valvsz;      for (frq += n = dvec->valvsz; --n >= 0; )        *--frq = 0;             /* traverse the frequency vectors */    }                           /* and init. the value frequencies */    if (k >= 0) break;          /* on error abort the loop */  }  if (i >= 0) {                 /* if an error occurred */    for (i = nbc->attcnt -i; --i >= 0; dvec++) {      if ((--dvec)->type != AT_NOM) continue;      discd = dvec->discds +clscnt;      for (k = clscnt -nbc->clscnt; --k >= 0; )        if ((--discd)->frqs) { free(discd->frqs); discd->frqs = NULL; }    }                           /* delete the newly created value */    return -1;                  /* frequency vectors of the nominal */  }                             /* attributes and abort the function */  nbc->clscnt = clscnt;         /* set the new number of classes */  return 0;                     /* return 'ok' */}  /* _clsrsz() *//*--------------------------------------------------------------------*/static int _valrsz (DVEC *dvec, int clscnt, int valcnt){                               /* --- resize the value freq. vectors */  int    i, k, n;               /* loop variables, num. of new elems. */  int    valvsz;                /* size of the value freq. vectors */  int    bsz;                   /* size of vector in bytes */  DISCD  *discd;                /* to traverse the discrete distribs. */  double *frq;                  /* to traverse the frequency vectors */  assert(dvec                   /* check the function argument */     && (dvec->type == AT_NOM) && (clscnt >= 0) && (valcnt >= 0));  valvsz = dvec->valvsz;        /* get the value freq. vector size */  if (valcnt > valvsz) {        /* if the vectors are too small */    valvsz += (valvsz > BLKSIZE) ? valvsz >> 1 : BLKSIZE;    if (valcnt > valvsz) valvsz = valcnt;    n   = valvsz -dvec->valcnt; /* get the number of new elements */    bsz = valvsz *2 *sizeof(double);                         for (discd = dvec->discds +(i = clscnt); --i >= 0; ) {      --discd;                  /* traverse the discrete distribs. */      frq = (double*)realloc(discd->frqs, bsz);      if (!frq) break;          /* resize the value freq. vector */      discd->frqs  = frq;       /* and the probabilities vector */      discd->probs = frq +valvsz;    /* and set the new vectors */      for (frq += valvsz, k = n; --k >= 0; )        *--frq = 0;             /* clear the new vector elements */    }    if (i < 0) {                /* if an error occurred */      bsz = dvec->valvsz *2 *sizeof(double);      for (i = clscnt -i -1; --i >= 0; ) {        ++discd;                /* traverse the processed distribs. */        discd->frqs  = (double*)realloc(discd->frqs, bsz);        discd->probs = discd->frqs +dvec->valvsz;      }                         /* shrink all value freq. vectors */      return -1;                /* to their old size */    }                           /* and then abort */  }  dvec->valcnt = valcnt;        /* set the new number of values */  return 0;                     /* return 'ok' */}  /* _valrsz() */#endif/*--------------------------------------------------------------------*/static int _exec (const NBC *nbc, int attid, const INST *inst){                               /* --- execute for one attribute */  int         i, k;             /* loop variable, buffer */  const DVEC  *dvec;            /* to traverse the distrib. vectors */  const NORMD *normd;           /* to traverse the normal   distribs. */  const DISCD *discd;           /* to traverse the discrete distribs. */  double      *prob;            /* to traverse the class probs. */  double      v, d, s;          /* temporary buffers */  assert(nbc && inst            /* check the function arguments */      && (attid >= 0) && (attid < nbc->attcnt));  dvec = nbc->dvecs +attid;     /* get the distribution vector */  assert(dvec->type != 0);      /* and check the attribute type */  if (dvec->type == AT_NOM) {   /* --- if the attribute is nominal */    k = inst->i;                /* get and check the attribute value */    if ((k < 0) || (k >= dvec->valcnt)) return -1;    discd = dvec->discds +nbc->clscnt;    prob  = nbc->cond    +nbc->clscnt;    for (i = nbc->clscnt; --i >= 0; ) {      d = (--discd)->probs[k];  /* traverse the discrete distribs. */      *--prob = (d > 0) ? d : EPSILON;    } }                         /* copy the class probabilities */  else {                        /* --- if the attribute is numeric */    if (dvec->type == AT_REAL){ /* if the attribute is real valued */      if (inst->f <= NV_REAL) return -1;      v = (double)inst->f; }    /* check and get the attribute value */    else {                      /* if the attribute is integer valued */      if (inst->i <= NV_INT)  return -1;      v = (double)inst->i;      /* check and get the attribute value */    }                           /* (convert it to double) */    normd = dvec->normds +nbc->clscnt;    prob  = nbc->cond    +nbc->clscnt;    for (i = nbc->clscnt; --i >= 0; ) {      d = v -(--normd)->exp;    /* traverse the normal distributions */      s = 2 *normd->var;        /* and get their parameters */      if (s < EPSILON) s = EPSILON;      *--prob = exp(-d*d/s) /sqrt(M_PI*s);    }                           /* compute the probability density */  }                             /* at the value of the attribute */  return 0;                     /* return 'ok' */}  /* _exec() *//*--------------------------------------------------------------------*/static double _normd (double drand (void)){                               /* --- compute N(0,1) distrib. number */  static double b;              /* buffer for random number */  double x, y, r;               /* coordinates and radius */  if (b != 0.0) {               /* if the buffer is full, */    x = b; b = 0; return x; }   /* return the buffered number */  do {                          /* pick a random point */    x = 2.0*drand()-1.0;        /* in the unit square [-1,1]^2 */    y = 2.0*drand()-1.0;        /* and check whether it lies */    r = x*x +y*y;               /* inside the unit circle */  } while ((r > 1) || (r == 0));  r = sqrt(-2*log(r)/r);        /* factor for Box-Muller transform */  b = x *r;                     /* save one of the random numbers */  return y *r;                  /* and return the other */}  /* _normd() *//*--------------------------------------------------------------------*/#ifdef NBC_INDUCEstatic int _eval (NBC *nbc, TABLE *table, int mode,                  SELATT *savec, int cnt){                               /* --- evaluate selectable attributes */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -