⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbayes.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 5 页
字号:
  int    i, k, n;               /* loop variables, buffers */  SELATT *sa;                   /* to traverse the selectable atts. */  TUPLE  *tpl;                  /* to traverse the tuples */  double *s, *d;                /* to traverse the probabilities */  double max, tmp;              /* maximum of probabilities, buffer */  int    old, new;              /* old and new predicted class */  int    cls;                   /* actual class of a tuple */  assert(nbc && table && savec  /* check the function arguments */     && (cnt > 0) && (mode & (NBC_ADD|NBC_REMOVE)));  for (n = tab_tplcnt(table); --n >= 0; ) {    tpl = tab_tpl(table, n);    /* traverse the tuples in the table */    cls = tpl_colval(tpl, nbc->clsid)->i;    if (cls < 0) continue;      /* skip tuples with an null class */    old = nbc_exec(nbc, tpl, NULL);    for (sa = savec +(i = cnt); --i >= 0; ) {      --sa;                     /* traverse the selectable attributes */      if (_exec(nbc, sa->attid, tpl_colval(tpl, sa->attid)) != 0)        new = old;              /* evaluate the classifier and */      else {                    /* on failure use the old class */        s = nbc->cond;          /* if a probability distribution */        d = nbc->posts;         /* could be determined, traverse it */        if (mode & NBC_ADD) {   /* if to add attributes, */          max = *d * *s;        /* multiply with cond. probability */          for (new = 0, k = 1; k < nbc->clscnt; k++) {            tmp = *++d * *++s;  /* compute new probability */            if (tmp > max) { max = tmp; new = k; }          } }                   /* find the most probable class */        else {                  /* if to remove attributes, */          max = *d / *s;        /* divide by cond. probability */          for (new = 0, k = 1; k < nbc->clscnt; k++) {            tmp = *++d / *++s;  /* compute new probability */            if (tmp > max) { max = tmp; new = k; }          }                     /* find the most probable class */        }                       /* for the current tuple */      }                         /* (det. new classification result) */      if (new != cls) sa->errs += tpl_getwgt(tpl);    }                           /* count the misclassifications */  }                             /* of the modified classifier */  return 0;                     /* return 'ok' */}  /* _eval() */#endif/*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/NBC* nbc_create (ATTSET *attset, int clsid){                               /* --- create a naive Bayes class. */  int    i, k, n;               /* loop variables */  NBC    *nbc;                  /* created classifier */  ATT    *att;                  /* to traverse the attributes */  DVEC   *dvec;                 /* to traverse the distrib. vectors */  DISCD  *discd;                /* to traverse the discrete distribs. */  NORMD  *normd;                /* to traverse the normal   distribs. */  double *frq;                  /* to traverse the frequency vectors */  assert(attset && (clsid >= 0) /* check the function arguments */      && (clsid < as_attcnt(attset))      && (att_type(as_att(attset, clsid)) == AT_NOM));  /* --- create the classifier body --- */  i   = as_attcnt(attset);      /* get the number of attributes */  nbc = (NBC*)malloc(sizeof(NBC) +(i-1) *sizeof(DVEC));  if (!nbc) return NULL;        /* allocate the classifier body */  for (dvec = nbc->dvecs +(k = i); --k >= 0; ) {    (--dvec)->discds = NULL; dvec->normds = NULL;  }                             /* clear the distribution vectors */  nbc->attset = attset;         /* (for a proper clean up on error) */  nbc->attcnt = i;              /* and initialize the other fields */  nbc->clsid  = clsid;  nbc->clsvsz = att_valcnt(as_att(attset, clsid));  nbc->clscnt = nbc->clsvsz;  nbc->total  = 0;  nbc->lcorr  = 0;  nbc->mode   = 0;  /* --- initialize the class distributions --- */  if (nbc->clscnt <= 0) {       /* if there are no classes, */    nbc->frqs   =               /* no class vectors are needed */    nbc->priors = nbc->posts = nbc->cond = NULL; }  else {                        /* if there are classes, */    nbc->frqs =                 /* allocate class vectors */    frq = (double*)malloc(nbc->clsvsz *4 *sizeof(double));    if (!frq) { nbc_delete(nbc, 0); return NULL; }    nbc->priors = frq         +nbc->clsvsz;    nbc->posts  = nbc->priors +nbc->clsvsz;    nbc->cond   = nbc->posts  +nbc->clsvsz;    for (frq += k = nbc->clsvsz; --k >= 0; )      *--frq = 0;               /* traverse the frequency vector */  }                             /* and init. the class frequencies */  /* --- initialize the conditional distributions --- */  for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {    (--dvec)->mark = -1;        /* traverse and unmark all attributes */    if (i == clsid) {           /* if this is the class attribute, */      dvec->type = 0; continue;}/* clear the type for easier recogn. */    att = as_att(attset, i);    /* get the next attribute */    dvec->type = att_type(att); /* and its type */    if (dvec->type == AT_NOM) { /* -- if the attribute is nominal */      dvec->valcnt =            /* set the number of att. values */      dvec->valvsz = att_valcnt(att);      if (nbc->clscnt <= 0)     /* if there are no classes, */        continue;               /* there is nothing else to do */      dvec->discds =            /* create a vector of discrete dists. */      discd = (DISCD*)calloc(nbc->clsvsz, sizeof(DISCD));      if (!discd) { nbc_delete(nbc, 0); return NULL; }      if (dvec->valcnt <= 0)    /* if the attribute has no values, */        continue;               /* there is nothing else to do */      for (discd += k = nbc->clscnt; --k >= 0; ) {        (--discd)->frqs =       /* create a value frequency vector */        frq = (double*)malloc(dvec->valvsz *2 *sizeof(double));        if (!frq) { nbc_delete(nbc, 0); return NULL; }        discd->probs = frq +dvec->valvsz;        for (frq += n = dvec->valvsz; --n >= 0; )          *--frq = 0;           /* traverse the frequency vectors */      } }                       /* and init. the value frequencies */    else {                      /* -- if the attribute is numeric */      dvec->valcnt = dvec->valvsz = 0;      if (nbc->clscnt <= 0)     /* if there are no classes, */        continue;               /* there is nothing else to do */      dvec->normds =            /* create a vector of normal dists. */      normd = (NORMD*)malloc(nbc->clsvsz *sizeof(NORMD));      if (!normd) { nbc_delete(nbc, 0); return NULL; }      for (normd += k = nbc->clsvsz; --k >= 0; ) {        (--normd)->cnt = 0; normd->sv = normd->sv2 = 0; }    }                           /* clear the sums from which expected */  }                             /* value and variance are computed */  return nbc;                   /* return the created classifier */}  /* nbc_create() *//*--------------------------------------------------------------------*/NBC* nbc_clone (NBC *nbc, int cloneas){                               /* --- clone a naive Bayes classifier */  NBC    *clone;                /* created classifier clone */  ATTSET *attset;               /* clone of attribute set */  int    i, k, n;               /* loop variables */  DVEC   *dv; const DVEC   *sv; /* to traverse the distrib. vectors */  NORMD  *dn; const NORMD  *sn; /* to traverse the normal   distribs. */  DISCD  *dd; const DISCD  *sd; /* to traverse the discrete distribs. */  double *df; const double *sf; /* to traverse the frequency vectors */  assert(nbc);                  /* check the function argument */  /* --- copy the classifier body --- */  attset = nbc->attset;         /* get the attribute set */  if (cloneas) {                /* if the corresp. flag is set, */    attset = as_clone(attset);  /* clone the attribute set */    if (!attset) return NULL;   /* of the original classifier, */  }                             /* and then create a classifier */  clone = (NBC*)malloc(sizeof(NBC) +(nbc->attcnt-1) *sizeof(DVEC));  if (!clone) { if (cloneas) as_delete(attset); return NULL; }  for (dv = clone->dvecs +(i = nbc->attcnt); --i >= 0; ) {    (--dv)->discds = NULL; dv->normds = NULL;  }                             /* clear the distribution vectors */  clone->attset = attset;       /* (for a proper clean up on error) */  clone->attcnt = nbc->attcnt;  /* and copy the other fields */  clone->clsid  = nbc->clsid;  clone->clsvsz = nbc->clscnt;  clone->clscnt = nbc->clscnt;  clone->total  = nbc->total;  clone->lcorr  = nbc->lcorr;  clone->mode   = nbc->mode;  /* --- copy the class distributions --- */  if (nbc->clscnt <= 0)         /* if there are no classes, */    clone->frqs   =             /* no class vectors are needed */    clone->priors = clone->posts = clone->cond = NULL;  else {                        /* if there are classes, */    clone->frqs =               /* allocate class vectors */    df = (double*)malloc(clone->clsvsz *4 *sizeof(double));    if (!df) { nbc_delete(clone, cloneas); return NULL; }    clone->priors = clone->frqs   +clone->clsvsz;    clone->posts  = clone->priors +clone->clsvsz;    clone->cond   = clone->posts  +clone->clsvsz;    sf = nbc->frqs +2 *clone->clscnt;    for (df += k = 2 *clone->clscnt; --k >= 0; )      *--df = *--sf;            /* traverse the frequency vector */  }                             /* and copy the class frequencies */  /* --- copy the conditional distributions --- */  sv = nbc->dvecs   +nbc->attcnt;  /* get pointers to the */  dv = clone->dvecs +nbc->attcnt;  /* distribution vectors */  for (i = nbc->attcnt; --i >= 0; ) {    --sv; --dv;                 /* traverse the distribution vectors */    dv->mark   = sv->mark;      /* copy the attribute mark, */    dv->type   = sv->type;      /* the attribute type, */    dv->valvsz = sv->valcnt;    /* the value vector size, and */    dv->valcnt = sv->valcnt;    /* the number of attribute values */    if ((sv->type    == 0)      /* if this is the class attribute */    ||  (nbc->clscnt <= 0))     /* or if there are no classes, */      continue;                 /* there is nothing else to do */    if (sv->type == AT_NOM) {   /* -- if the attribute is nominal */      dv->discds =              /* create a vector of discrete dists. */      dd = (DISCD*)calloc(clone->clsvsz, sizeof(DISCD));      if (!dd) { nbc_delete(clone, cloneas); return NULL; }      if (sv->valcnt <= 0)      /* if the attribute has no values, */        continue;               /* there is nothing else to do */      sd = sv->discds +nbc->clscnt;      for (dd += (k = nbc->clscnt); --k >= 0; ) {        --dd; --sd;             /* traverse the discrete distribs. */        dd->cnt  = sd->cnt;     /* copy the total frequency and */        dd->frqs =              /* create a value frequency vector */        df = (double*)malloc(dv->valvsz *2 *sizeof(double));        if (!df) { nbc_delete(clone, cloneas); return NULL; }        dd->probs = df +dv->valvsz;        sf = sd->frqs +2 *dv->valvsz;        for (df += n = 2 *dv->valvsz; --n >= 0; )          *--df = *--sf;        /* traverse the frequency vectors */      } }                       /* and copy the value frequencies */    else {                      /* -- if the attribute is numeric */      dv->normds =              /* create a vector of normal dists. */      dn = (NORMD*)malloc(clone->clsvsz *sizeof(NORMD));      if (!dn) { nbc_delete(clone, cloneas); return NULL; }      sn = sv->normds +clone->clsvsz;      for (dn += k = clone->clsvsz; --k >= 0; )        *--dn = *--sn;          /* copy the normal distributions */    }                           /* (including computed estimates) */  }  return clone;                 /* return the created clone */}  /* nbc_clone() *//*--------------------------------------------------------------------*/void nbc_delete (NBC *nbc, int delas){                               /* --- delete a naive Bayes class. */  int   i, k;                   /* loop variables */  DVEC  *dvec;                  /* to traverse the distrib. vectors */  DISCD *discd;                 /* to traverse the discrete distribs. */  assert(nbc);                  /* check the function argument */  for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) {    if ((--dvec)->discds) {     /* traverse the attributes */      for (discd = dvec->discds +(k = nbc->clscnt); --k >= 0; )        if ((--discd)->frqs) free(discd->frqs);      free(dvec->discds);       /* delete all frequency vectors */    }                           /* and the distribution vectors */    if (dvec->normds) free(dvec->normds);  }                             /* delete the normal distributions */  if (nbc->frqs) free(nbc->frqs);  if (delas)     as_delete(nbc->attset);  free(nbc);                    /* delete the classifier body */}  /* nbc_delete() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -