📄 nbayes.c
字号:
int i, k, n; /* loop variables, buffers */ SELATT *sa; /* to traverse the selectable atts. */ TUPLE *tpl; /* to traverse the tuples */ double *s, *d; /* to traverse the probabilities */ double max, tmp; /* maximum of probabilities, buffer */ int old, new; /* old and new predicted class */ int cls; /* actual class of a tuple */ assert(nbc && table && savec /* check the function arguments */ && (cnt > 0) && (mode & (NBC_ADD|NBC_REMOVE))); for (n = tab_tplcnt(table); --n >= 0; ) { tpl = tab_tpl(table, n); /* traverse the tuples in the table */ cls = tpl_colval(tpl, nbc->clsid)->i; if (cls < 0) continue; /* skip tuples with an null class */ old = nbc_exec(nbc, tpl, NULL); for (sa = savec +(i = cnt); --i >= 0; ) { --sa; /* traverse the selectable attributes */ if (_exec(nbc, sa->attid, tpl_colval(tpl, sa->attid)) != 0) new = old; /* evaluate the classifier and */ else { /* on failure use the old class */ s = nbc->cond; /* if a probability distribution */ d = nbc->posts; /* could be determined, traverse it */ if (mode & NBC_ADD) { /* if to add attributes, */ max = *d * *s; /* multiply with cond. probability */ for (new = 0, k = 1; k < nbc->clscnt; k++) { tmp = *++d * *++s; /* compute new probability */ if (tmp > max) { max = tmp; new = k; } } } /* find the most probable class */ else { /* if to remove attributes, */ max = *d / *s; /* divide by cond. probability */ for (new = 0, k = 1; k < nbc->clscnt; k++) { tmp = *++d / *++s; /* compute new probability */ if (tmp > max) { max = tmp; new = k; } } /* find the most probable class */ } /* for the current tuple */ } /* (det. new classification result) */ if (new != cls) sa->errs += tpl_getwgt(tpl); } /* count the misclassifications */ } /* of the modified classifier */ return 0; /* return 'ok' */} /* _eval() */#endif/*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/NBC* nbc_create (ATTSET *attset, int clsid){ /* --- create a naive Bayes class. */ int i, k, n; /* loop variables */ NBC *nbc; /* created classifier */ ATT *att; /* to traverse the attributes */ DVEC *dvec; /* to traverse the distrib. vectors */ DISCD *discd; /* to traverse the discrete distribs. */ NORMD *normd; /* to traverse the normal distribs. */ double *frq; /* to traverse the frequency vectors */ assert(attset && (clsid >= 0) /* check the function arguments */ && (clsid < as_attcnt(attset)) && (att_type(as_att(attset, clsid)) == AT_NOM)); /* --- create the classifier body --- */ i = as_attcnt(attset); /* get the number of attributes */ nbc = (NBC*)malloc(sizeof(NBC) +(i-1) *sizeof(DVEC)); if (!nbc) return NULL; /* allocate the classifier body */ for (dvec = nbc->dvecs +(k = i); --k >= 0; ) { (--dvec)->discds = NULL; dvec->normds = NULL; } /* clear the distribution vectors */ nbc->attset = attset; /* (for a proper clean up on error) */ nbc->attcnt = i; /* and initialize the other fields */ nbc->clsid = clsid; nbc->clsvsz = att_valcnt(as_att(attset, clsid)); nbc->clscnt = nbc->clsvsz; nbc->total = 0; nbc->lcorr = 0; nbc->mode = 0; /* --- initialize the class distributions --- */ if (nbc->clscnt <= 0) { /* if there are no classes, */ nbc->frqs = /* no class vectors are needed */ nbc->priors = nbc->posts = nbc->cond = NULL; } else { /* if there are classes, */ nbc->frqs = /* allocate class vectors */ frq = (double*)malloc(nbc->clsvsz *4 *sizeof(double)); if (!frq) { nbc_delete(nbc, 0); return NULL; } nbc->priors = frq +nbc->clsvsz; nbc->posts = nbc->priors +nbc->clsvsz; nbc->cond = nbc->posts +nbc->clsvsz; for (frq += k = nbc->clsvsz; --k >= 0; ) *--frq = 0; /* traverse the frequency vector */ } /* and init. the class frequencies */ /* --- initialize the conditional distributions --- */ for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) { (--dvec)->mark = -1; /* traverse and unmark all attributes */ if (i == clsid) { /* if this is the class attribute, */ dvec->type = 0; continue;}/* clear the type for easier recogn. */ att = as_att(attset, i); /* get the next attribute */ dvec->type = att_type(att); /* and its type */ if (dvec->type == AT_NOM) { /* -- if the attribute is nominal */ dvec->valcnt = /* set the number of att. values */ dvec->valvsz = att_valcnt(att); if (nbc->clscnt <= 0) /* if there are no classes, */ continue; /* there is nothing else to do */ dvec->discds = /* create a vector of discrete dists. */ discd = (DISCD*)calloc(nbc->clsvsz, sizeof(DISCD)); if (!discd) { nbc_delete(nbc, 0); return NULL; } if (dvec->valcnt <= 0) /* if the attribute has no values, */ continue; /* there is nothing else to do */ for (discd += k = nbc->clscnt; --k >= 0; ) { (--discd)->frqs = /* create a value frequency vector */ frq = (double*)malloc(dvec->valvsz *2 *sizeof(double)); if (!frq) { nbc_delete(nbc, 0); return NULL; } discd->probs = frq +dvec->valvsz; for (frq += n = dvec->valvsz; --n >= 0; ) *--frq = 0; /* traverse the frequency vectors */ } } /* and init. the value frequencies */ else { /* -- if the attribute is numeric */ dvec->valcnt = dvec->valvsz = 0; if (nbc->clscnt <= 0) /* if there are no classes, */ continue; /* there is nothing else to do */ dvec->normds = /* create a vector of normal dists. */ normd = (NORMD*)malloc(nbc->clsvsz *sizeof(NORMD)); if (!normd) { nbc_delete(nbc, 0); return NULL; } for (normd += k = nbc->clsvsz; --k >= 0; ) { (--normd)->cnt = 0; normd->sv = normd->sv2 = 0; } } /* clear the sums from which expected */ } /* value and variance are computed */ return nbc; /* return the created classifier */} /* nbc_create() *//*--------------------------------------------------------------------*/NBC* nbc_clone (NBC *nbc, int cloneas){ /* --- clone a naive Bayes classifier */ NBC *clone; /* created classifier clone */ ATTSET *attset; /* clone of attribute set */ int i, k, n; /* loop variables */ DVEC *dv; const DVEC *sv; /* to traverse the distrib. vectors */ NORMD *dn; const NORMD *sn; /* to traverse the normal distribs. */ DISCD *dd; const DISCD *sd; /* to traverse the discrete distribs. */ double *df; const double *sf; /* to traverse the frequency vectors */ assert(nbc); /* check the function argument */ /* --- copy the classifier body --- */ attset = nbc->attset; /* get the attribute set */ if (cloneas) { /* if the corresp. flag is set, */ attset = as_clone(attset); /* clone the attribute set */ if (!attset) return NULL; /* of the original classifier, */ } /* and then create a classifier */ clone = (NBC*)malloc(sizeof(NBC) +(nbc->attcnt-1) *sizeof(DVEC)); if (!clone) { if (cloneas) as_delete(attset); return NULL; } for (dv = clone->dvecs +(i = nbc->attcnt); --i >= 0; ) { (--dv)->discds = NULL; dv->normds = NULL; } /* clear the distribution vectors */ clone->attset = attset; /* (for a proper clean up on error) */ clone->attcnt = nbc->attcnt; /* and copy the other fields */ clone->clsid = nbc->clsid; clone->clsvsz = nbc->clscnt; clone->clscnt = nbc->clscnt; clone->total = nbc->total; clone->lcorr = nbc->lcorr; clone->mode = nbc->mode; /* --- copy the class distributions --- */ if (nbc->clscnt <= 0) /* if there are no classes, */ clone->frqs = /* no class vectors are needed */ clone->priors = clone->posts = clone->cond = NULL; else { /* if there are classes, */ clone->frqs = /* allocate class vectors */ df = (double*)malloc(clone->clsvsz *4 *sizeof(double)); if (!df) { nbc_delete(clone, cloneas); return NULL; } clone->priors = clone->frqs +clone->clsvsz; clone->posts = clone->priors +clone->clsvsz; clone->cond = clone->posts +clone->clsvsz; sf = nbc->frqs +2 *clone->clscnt; for (df += k = 2 *clone->clscnt; --k >= 0; ) *--df = *--sf; /* traverse the frequency vector */ } /* and copy the class frequencies */ /* --- copy the conditional distributions --- */ sv = nbc->dvecs +nbc->attcnt; /* get pointers to the */ dv = clone->dvecs +nbc->attcnt; /* distribution vectors */ for (i = nbc->attcnt; --i >= 0; ) { --sv; --dv; /* traverse the distribution vectors */ dv->mark = sv->mark; /* copy the attribute mark, */ dv->type = sv->type; /* the attribute type, */ dv->valvsz = sv->valcnt; /* the value vector size, and */ dv->valcnt = sv->valcnt; /* the number of attribute values */ if ((sv->type == 0) /* if this is the class attribute */ || (nbc->clscnt <= 0)) /* or if there are no classes, */ continue; /* there is nothing else to do */ if (sv->type == AT_NOM) { /* -- if the attribute is nominal */ dv->discds = /* create a vector of discrete dists. */ dd = (DISCD*)calloc(clone->clsvsz, sizeof(DISCD)); if (!dd) { nbc_delete(clone, cloneas); return NULL; } if (sv->valcnt <= 0) /* if the attribute has no values, */ continue; /* there is nothing else to do */ sd = sv->discds +nbc->clscnt; for (dd += (k = nbc->clscnt); --k >= 0; ) { --dd; --sd; /* traverse the discrete distribs. */ dd->cnt = sd->cnt; /* copy the total frequency and */ dd->frqs = /* create a value frequency vector */ df = (double*)malloc(dv->valvsz *2 *sizeof(double)); if (!df) { nbc_delete(clone, cloneas); return NULL; } dd->probs = df +dv->valvsz; sf = sd->frqs +2 *dv->valvsz; for (df += n = 2 *dv->valvsz; --n >= 0; ) *--df = *--sf; /* traverse the frequency vectors */ } } /* and copy the value frequencies */ else { /* -- if the attribute is numeric */ dv->normds = /* create a vector of normal dists. */ dn = (NORMD*)malloc(clone->clsvsz *sizeof(NORMD)); if (!dn) { nbc_delete(clone, cloneas); return NULL; } sn = sv->normds +clone->clsvsz; for (dn += k = clone->clsvsz; --k >= 0; ) *--dn = *--sn; /* copy the normal distributions */ } /* (including computed estimates) */ } return clone; /* return the created clone */} /* nbc_clone() *//*--------------------------------------------------------------------*/void nbc_delete (NBC *nbc, int delas){ /* --- delete a naive Bayes class. */ int i, k; /* loop variables */ DVEC *dvec; /* to traverse the distrib. vectors */ DISCD *discd; /* to traverse the discrete distribs. */ assert(nbc); /* check the function argument */ for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) { if ((--dvec)->discds) { /* traverse the attributes */ for (discd = dvec->discds +(k = nbc->clscnt); --k >= 0; ) if ((--discd)->frqs) free(discd->frqs); free(dvec->discds); /* delete all frequency vectors */ } /* and the distribution vectors */ if (dvec->normds) free(dvec->normds); } /* delete the normal distributions */ if (nbc->frqs) free(nbc->frqs); if (delas) as_delete(nbc->attset); free(nbc); /* delete the classifier body */} /* nbc_delete() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -