⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbayes.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 5 页
字号:
  n   = nbc->clscnt;            /* get the number of classes and */  prb = nbc->priors +n;         /* traverse the class probabilities */  if (cnt <= 0)                 /* if the denominator is invalid, */    while (--n >= 0) *--prb = 0;       /* clear all probabilities */  else {                        /* if the denominator is valid, */    frq = nbc->frqs +n;         /* traverse the class frequencies */    while (--n >= 0) *--prb = (*--frq +lcorr) /cnt;  }                             /* estimate the class probabilities */  /* --- estimate conditional probabilities --- */  for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {    if ((--dvec)->type == 0) {  /* traverse all attributes */      dvec->mark = 0; continue;}/* except the class attribute */    if      (mode & NBC_ALL)    /* if to use all attributes, */      dvec->mark = 1;           /* mark attribute as used */    else if (mode & NBC_MARKED) /* if to use only marked atts. */      dvec->mark = (att_getmark(as_att(nbc->attset, i)) >= 0)                 ? 1 : -1;      /* get the attribute mark */    if (dvec->mark < 0)         /* otherwise keep the */      continue;                 /* selection of attributes */    if (dvec->type == AT_NOM) { /* -- if the attribute is nominal */      if (dvec->valcnt <= 0) {  /* if the attribute has no values, */        dvec->mark = -1; continue; }     /* there is nothing to do */      sp = dvec->valcnt *lcorr; /* compute the sum of the priors */      for (discd = dvec->discds +(k = nbc->clscnt); --k >= 0; ) {        --discd;                /* traverse the distributions */        n   = dvec->valcnt;     /* get the number of att. values */        prb = discd->probs +n;  /* and the probability vector */        add = (mode & NBC_DWNULL) ? nbc->frqs[k] : discd->cnt;        cnt = sp +add;          /* compute denominator of estimator */        if (cnt <= 0)           /* if the estimator is invalid, */          while (--n >= 0) *--prb = 0;      /* clear all probs. */        else {                  /* if the estimator is valid */          add = lcorr +(add -discd->cnt) /n;          for (frq = discd->frqs +n; --n >= 0; )            *--prb = (*--frq +add) /cnt;        }                       /* traverse the value frequencies */      } }                       /* and estimate the probabilities */    else {                      /* -- if the attribute is numeric */      for (normd = dvec->normds +(k = nbc->clscnt); --k >= 0; ) {        cnt = (--normd)->cnt;   /* traverse the distributions */        normd->exp = (cnt > 0) ? normd->sv /cnt : 0;        if (!(mode & NBC_MAXLLH)) cnt -= 1;        normd->var = (cnt > 0)                   ? (normd->sv2 -normd->exp *normd->sv) /cnt : 0;      }                         /* estimate the expected value and */    }                           /* the variance (either with unbiased */  }                             /* or with max. likelihood estimator) */  nbc->dvecs[nbc->clsid].mark = 1;  /* mark the class attribute */}  /* nbc_setup() *//*----------------------------------------------------------------------Point estimator for the expected value of a normal distribution:  \hat{\mu} = \frac{1}{n} \sum_{i=1}^n x_i(unbiased, consistent, and efficient)Point estimator for the variance of a normal distribution:  \hat{\sigma}^2 = \frac{1}{n-1} \sum_{i=1}^n (x_i - \hat{\mu})^2                 = \frac{1}{n-1} (\sum_{i=1}^n x_i^2 - n\hat{\mu}^2)(unbiased and consistent)Maximum likelihood estimator for the variance of a normal distribution:  \hat{\sigma}^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \hat{\mu})^2                 = \frac{1}{n} (\sum_{i=1}^n x_i^2 - n\hat{\mu}^2)(consistent)Source: R.L. Larsen and M.L. Marx.        An Introduction to Mathematical Statistics and Its Applications.        Prentice Hall, Englewood Cliffs, NJ, USA 1986, pp. 312 & 314----------------------------------------------------------------------*/int nbc_exec (NBC *nbc, const TUPLE *tpl, double *conf){                               /* --- execute a naive Bayes class. */  int        i, k;              /* loop variables */  DVEC       *dvec;             /* to traverse the distrib. vectors */  const INST *inst;             /* to traverse the instances */  double     *s, *d;            /* to traverse the probabilities */  double     sum;               /* sum of class probabilities */  assert(nbc);                  /* check the function argument */  /* --- initialize --- */  s = nbc->priors +nbc->clscnt; /* init. the posterior distribution */  d = nbc->posts  +nbc->clscnt; /* with  the prior     distribution */  for (k = nbc->clscnt; --k >= 0; ) *--d = *--s;  /* --- process attribute values --- */  for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {    if (((--dvec)->type == 0)   /* traverse all attributes */    ||  (   dvec ->mark <  0))  /* except the class attribute */      continue;                 /* and all unmarked attributes */    inst = (tpl)                /* get the attribute instantiation */         ? tpl_colval(tpl, i)   /* from the tuple or the att. set */         : att_inst(as_att(nbc->attset, i));    if (_exec(nbc, i, inst) < 0)/* execute the classifier */      continue;                 /* for the current attribute */    s = nbc->cond +nbc->clscnt; /* traverse the cond. probabilities */    d = nbc->posts+nbc->clscnt; /* and the posterior distribution */    for (sum = 0, k = nbc->clscnt; --k >= 0; )      sum += *--d *= *--s;      /* multiply with cond. probabilities */    if ((sum > 1e-24) && (sum < 1e24))      continue;                 /* if the sum is ok, continue */    if (sum <= 0) break;        /* if the sum is fubar, abort */    for (d += k = nbc->clscnt; --k >= 0; )      *--d /= sum;              /* otherwise renormalize in order */  }                             /* to avoid an over- or underflow */  s = d = nbc->posts;           /* traverse the final distribution */  for (sum = *s, k = nbc->clscnt; --k > 0; ) {    if (*++s > *d) d = s;       /* find the most probable class */    sum += *s;                  /* and sum all probabilities */  }                             /* (for the normalization) */  sum = (sum > 0) ? 1/sum : 1;  /* compute normalization factor */  for (k = nbc->clscnt; --k >= 0; )    nbc->posts[k] *= sum;       /* normalize probabilities */  if (conf) *conf = *d;         /* get the confidence value and */  return (int)(d -nbc->posts);  /* return the classification result */}  /* nbc_exec() *//*--------------------------------------------------------------------*/void nbc_rand (NBC *nbc, double drand (void)){                               /* --- generate a random tuple */  int    i, k, n;               /* loop variables */  double t, sum;                /* random number, sum of probs. */  double *p;                    /* to access the probabilities */  DVEC   *dvec;                 /* to traverse the distrib. vectors */  INST   *inst;                 /* to traverse the instances */  NORMD  *nd;                   /* normal distribution */  p = nbc->priors;              /* get the prior probabilities */  t = drand();                  /* generate a random number */  for (sum = i = 0; i < nbc->clscnt; i++) {    sum += p[i]; if (sum >= t) break; }  if (i >= nbc->clscnt)         /* find the class that corresponds */    i = nbc->clscnt -1;         /* to the generated random number */  att_inst(as_att(nbc->attset, nbc->clsid))->i = i;  for (dvec = nbc->dvecs +(n = nbc->attcnt); --n >= 0; ) {    if (((--dvec)->type == 0)   /* traverse all attributes */    ||  (   dvec ->mark <  0))  /* except the class attribute */      continue;                 /* and all unmarked attributes */    inst = att_inst(as_att(nbc->attset, n));    if (dvec->type == AT_NOM) { /* --- if the attribute is nominal */      p = dvec->discds[i].probs;/* get the conditional distribution */      t = drand();              /* generate a random number */      for (sum = k = 0; k < dvec->valcnt; k++) {        sum += p[k]; if (sum >= t) break; }      if (k >= dvec->valcnt)    /* find the value that corresponds */        k = dvec->valcnt -1;    /* to the generated random number */      inst->i = k; }            /* and set the attribute instance */    else {                      /* --- if the attribute is numeric */      nd = dvec->normds +i;     /* get the conditional distribution */      t  = sqrt(nd->var) *_normd(drand) +nd->exp;      if (dvec->type == AT_REAL) inst->f = (float)t;      else                       inst->i = (int)(t +0.5);    }                           /* sample from the normal distrib. */  }                             /* and transform the result */}  /* nbc_rand() *//*--------------------------------------------------------------------*/int nbc_desc (NBC *nbc, FILE *file, int mode, int maxlen){                               /* --- describe a naive Bayes class. */  int         i, k, n;          /* loop variables */  int         pos, ind;         /* current position and indentation */  int         len, l;           /* length of class/value name/number */  const char  *clsname;         /* name of class attribute */  ATT         *att, *clsatt;    /* to traverse the attributes */  const DVEC  *dvec;            /* to traverse the distrib. vectors */  const NORMD *normd;           /* to traverse the normal   distribs. */  const DISCD *discd;           /* to traverse the discrete distribs. */  char  name[4*AS_MAXLEN+4];    /* output buffer for names */  char  num[64];                /* output buffer for numbers */  assert(nbc && file);          /* check the function arguments */  /* --- print a header (as a comment) --- */  if (mode & NBC_TITLE) {       /* if the title flag is set */    i = k = (maxlen > 0) ? maxlen -2 : 70;    fputs("/*", file); while (--i >= 0) fputc('-', file);    fputs("\n  naive Bayes classifier\n", file);    while (--k >= 0) fputc('-', file); fputs("*/\n", file);  }                             /* print a title header */  if (maxlen <= 0) maxlen = INT_MAX;  /* --- start description --- */  clsatt  = as_att(nbc->attset, nbc->clsid);  clsname = att_name(clsatt);   /* note the class attribute name */  fputs("nbc(", file);          /* (is needed repeatedly below) */  sc_format(name, clsname, 0);  /* format and print */  fputs(name, file);            /* the class attribute name */  fputs(") = {\n", file);       /* and start the description */  if ((nbc->lcorr > 0)          /* if estimation parameters */  ||   nbc->mode) {             /* differ from default values */    fprintf(file, "  params = %g", nbc->lcorr);    if (nbc->mode & NBC_DWNULL) fputs(", dwnull", file);    if (nbc->mode & NBC_MAXLLH) fputs(", maxllh", file);    fputs(";\n", file);         /* print Laplace correction */  }                             /* and estimation mode */  /* --- print the class distribution --- */  fputs("  prob(", file);       /* print a distribution indicator */  fputs(name, file);            /* and the class attribute name and */  fputs(") = {\n    ", file);   /* start the class distribution */  pos = 4;                      /* initialize the output position */  ind = att_valwd(clsatt,0) +2; /* compute position and indentation */  for (i = 0; i < nbc->clscnt; i++) {   /* traverse the classes */    if (i > 0)                  /* if this is not the first class, */      fputs(",\n    ", file);   /* start a new output line */    len = sc_format(name, att_valname(clsatt, i), 0);    fputs(name, file);          /* get and print the class name */    for (pos = len+2; pos < ind; pos++)      putc(' ', file);          /* pad with blanks to equal width */    fprintf(file, ": %g", nbc->frqs[i]);    if (mode & NBC_REL)         /* print the absolute class frequency */      fprintf(file, " (%.1f%%)", nbc->priors[i] *100);  }                             /* print the relative class frequency */  fputs(" };\n", file);         /* terminate the class distribution */  /* --- print the (conditional) distributions --- */  for (dvec = nbc->dvecs, n = 0; n < nbc->attcnt; dvec++, n++) {    if ((dvec->type == 0)       /* traverse all attributes, */    ||  ((mode & NBC_MARKED)    /* but skip the class attribute */    &&   (dvec->mark < 0)))     /* and in marked mode also */      continue;                 /* all unmarked attributes */    fputs("  prob(", file);     /* print a distribution indicator */    att = as_att(nbc->attset,n);/* and get the next attribute */    sc_format(name, att_name(att), 0);    fputs(name, file);          /* print the attribute name */    putc('|', file);            /* and the condition separator */    sc_format(name, clsname,0); /* format and print */    fputs(name, file);          /* the class attribute name */    fputs(") = {\n    ", file); /* and start the cond. distribution */    if (dvec->discds) {         /* if the attribute is nominal, */      discd = dvec->discds;     /* traverse the discrete distribs. */      for (i = 0; i < nbc->clscnt; discd++, i++) {        if (i > 0)              /* if this is not the first class, */          fputs(",\n    ", file);       /* start a new output line */        len = sc_format(name, att_valname(clsatt, i), 0);        fputs(name, file);      /* get and print the class name */        for (pos = len+2; pos < ind; pos++)          putc(' ', file);      /* pad with blanks to equal width */        fputs(":{", file);      /* start the value distribution and */        pos += 3;               /* traverse the attribute values */        for (k = 0; k < dvec->valcnt; k++) {          if (k > 0) {          /* if this is not the first value, */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -