📄 nbayes.c
字号:
/*---------------------------------------------------------------------- File : nbayes.c Contents: Naive Bayes classifier management Author : Christian Borgelt History : 1998.12.07 file created 1998.12.08 nbc_create, nbc_clone, nbc_delete, nbc_add prog. 1998.12.10 function nbc_desc completed 1998.12.11 function nbc_exec completed 1998.12.12 function nbc_parse completed 1998.12.16 all functions debugged 1999.02.13 tuple parameters added to nbc_add and nbc_exec 1999.01.10 execution for one att. made a separate function 1999.03.11 function nbc_induce added 1999.03.25 distrib. of tuple weight for null values added 1999.03.27 functions nbc_exp und nbc_var added 1999.05.15 automatic frequency vector resizing added 2000.11.10 function nbc_exec adapted 2000.11.18 function nbc_setup added, nbc_exec adapted 2000.11.21 redesign completed 2001.02.11 bug in function nbc_mark (> instead of >=) fixed 2001.07.15 parser improved (global variables removed) 2001.07.16 adapted to modified module scan 2001.07.17 parser improved (conditional look ahead) 2003.04.26 function nbc_rand added 2004.04.15 zero variances replaced by EPSILON 2004.08.12 adapted to new module parse 2007.02.13 adapted to modified module attset 2007.03.21 function nbc_exec extended (posterior probs.)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "nbayes.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define M_PI 3.14159265358979323846 /* \pi */#define EPSILON 1e-12 /* to handle roundoff errors */#define BLKSIZE 16 /* block size for vectors *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef struct { /* --- selectable attribute --- */ int attid; /* attribute identifier */ double errs; /* number of misclassifications */} SELATT; /* (selectable attribute) *//*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/#ifdef NBC_INDUCEstatic int _clsrsz (NBC *nbc, int clscnt){ /* --- resize class dependent vectors */ int i, k, n; /* loop variables, buffer */ int clsvsz; /* size of the class dep. vectors */ DVEC *dvec; /* to traverse the distrib. vectors */ NORMD *normd; /* to traverse the normal distribs. */ DISCD *discd; /* to traverse the discrete distribs. */ double *frq; /* to traverse the frequency vectors */ assert(nbc && (clscnt >= 0)); /* check the function arguments */ /* --- resize the class dependent vectors --- */ clsvsz = nbc->clsvsz; /* get the class dep. vector size */ if (clscnt >= clsvsz) { /* if the vectors are too small */ clsvsz += (clsvsz > BLKSIZE) ? clsvsz >> 1 : BLKSIZE; if (clscnt >= clsvsz) clsvsz = clscnt; frq = (double*)realloc(nbc->frqs, clsvsz *4 *sizeof(double)); if (!frq) return -1; /* resize the frequencies vector */ nbc->frqs = frq; /* and set the new vector */ nbc->priors = nbc->frqs +clsvsz; /* organize the rest */ nbc->posts = nbc->priors +clsvsz; /* of the allocated */ nbc->cond = nbc->posts +clsvsz; /* memory block */ n = clsvsz -nbc->clsvsz; /* calc. number of new vector fields */ for (frq += clsvsz, k = n; --k >= 0; ) *--frq = 0; /* clear the new vector fields */ for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) { if ((--dvec)->type == 0) /* traverse all attributes */ continue; /* except the class attribute */ if (dvec->type == AT_NOM){/* if the attribute is nominal */ discd = (DISCD*)realloc(dvec->discds, clsvsz *sizeof(DISCD)); if (!discd) return -1; /* resize the discrete dists. vector */ dvec->discds = discd; /* and set the new vector */ for (discd += clsvsz, k = n; --k >= 0; ) { (--discd)->cnt = 0; discd->frqs = NULL; } } /* clear the new vector fields */ else { /* if the attribute is numeric */ normd = (NORMD*)realloc(dvec->normds, clsvsz *sizeof(NORMD)); if (!normd) return -1; /* resize the normal dists. vector */ dvec->normds = normd; /* and set the new vector */ for (normd += clsvsz, k = n; --k >= 0; ) { (--normd)->cnt = 0; normd->sv = normd->sv2 = 0; } } /* clear the new vector fields */ } /* for (dvec = ... */ nbc->clsvsz = clsvsz; /* set new size of the class vectors */ } /* if (clscnt >= clsvsz) ... */ /* --- create new value frequency vectors --- */ for (dvec = nbc->dvecs +(i = nbc->attcnt); --i >= 0; ) { if ((--dvec)->type != AT_NOM) continue; /* traverse all nominal attributes */ discd = dvec->discds +clscnt; for (k = clscnt -nbc->clscnt; --k >= 0; ) { (--discd)->frqs = /* allocate a value frequency vector */ frq = (double*)malloc(dvec->valvsz *2 *sizeof(double)); if (!frq) break; /* set the probabilities vector */ discd->probs = frq +dvec->valvsz; for (frq += n = dvec->valvsz; --n >= 0; ) *--frq = 0; /* traverse the frequency vectors */ } /* and init. the value frequencies */ if (k >= 0) break; /* on error abort the loop */ } if (i >= 0) { /* if an error occurred */ for (i = nbc->attcnt -i; --i >= 0; dvec++) { if ((--dvec)->type != AT_NOM) continue; discd = dvec->discds +clscnt; for (k = clscnt -nbc->clscnt; --k >= 0; ) if ((--discd)->frqs) { free(discd->frqs); discd->frqs = NULL; } } /* delete the newly created value */ return -1; /* frequency vectors of the nominal */ } /* attributes and abort the function */ nbc->clscnt = clscnt; /* set the new number of classes */ return 0; /* return 'ok' */} /* _clsrsz() *//*--------------------------------------------------------------------*/static int _valrsz (DVEC *dvec, int clscnt, int valcnt){ /* --- resize the value freq. vectors */ int i, k, n; /* loop variables, num. of new elems. */ int valvsz; /* size of the value freq. vectors */ int bsz; /* size of vector in bytes */ DISCD *discd; /* to traverse the discrete distribs. */ double *frq; /* to traverse the frequency vectors */ assert(dvec /* check the function argument */ && (dvec->type == AT_NOM) && (clscnt >= 0) && (valcnt >= 0)); valvsz = dvec->valvsz; /* get the value freq. vector size */ if (valcnt > valvsz) { /* if the vectors are too small */ valvsz += (valvsz > BLKSIZE) ? valvsz >> 1 : BLKSIZE; if (valcnt > valvsz) valvsz = valcnt; n = valvsz -dvec->valcnt; /* get the number of new elements */ bsz = valvsz *2 *sizeof(double); for (discd = dvec->discds +(i = clscnt); --i >= 0; ) { --discd; /* traverse the discrete distribs. */ frq = (double*)realloc(discd->frqs, bsz); if (!frq) break; /* resize the value freq. vector */ discd->frqs = frq; /* and the probabilities vector */ discd->probs = frq +valvsz; /* and set the new vectors */ for (frq += valvsz, k = n; --k >= 0; ) *--frq = 0; /* clear the new vector elements */ } if (i < 0) { /* if an error occurred */ bsz = dvec->valvsz *2 *sizeof(double); for (i = clscnt -i -1; --i >= 0; ) { ++discd; /* traverse the processed distribs. */ discd->frqs = (double*)realloc(discd->frqs, bsz); discd->probs = discd->frqs +dvec->valvsz; } /* shrink all value freq. vectors */ return -1; /* to their old size */ } /* and then abort */ } dvec->valcnt = valcnt; /* set the new number of values */ return 0; /* return 'ok' */} /* _valrsz() */#endif/*--------------------------------------------------------------------*/static int _exec (const NBC *nbc, int attid, const INST *inst){ /* --- execute for one attribute */ int i, k; /* loop variable, buffer */ const DVEC *dvec; /* to traverse the distrib. vectors */ const NORMD *normd; /* to traverse the normal distribs. */ const DISCD *discd; /* to traverse the discrete distribs. */ double *prob; /* to traverse the class probs. */ double v, d, s; /* temporary buffers */ assert(nbc && inst /* check the function arguments */ && (attid >= 0) && (attid < nbc->attcnt)); dvec = nbc->dvecs +attid; /* get the distribution vector */ assert(dvec->type != 0); /* and check the attribute type */ if (dvec->type == AT_NOM) { /* --- if the attribute is nominal */ k = inst->i; /* get and check the attribute value */ if ((k < 0) || (k >= dvec->valcnt)) return -1; discd = dvec->discds +nbc->clscnt; prob = nbc->cond +nbc->clscnt; for (i = nbc->clscnt; --i >= 0; ) { d = (--discd)->probs[k]; /* traverse the discrete distribs. */ *--prob = (d > 0) ? d : EPSILON; } } /* copy the class probabilities */ else { /* --- if the attribute is numeric */ if (dvec->type == AT_REAL){ /* if the attribute is real valued */ if (inst->f <= NV_REAL) return -1; v = (double)inst->f; } /* check and get the attribute value */ else { /* if the attribute is integer valued */ if (inst->i <= NV_INT) return -1; v = (double)inst->i; /* check and get the attribute value */ } /* (convert it to double) */ normd = dvec->normds +nbc->clscnt; prob = nbc->cond +nbc->clscnt; for (i = nbc->clscnt; --i >= 0; ) { d = v -(--normd)->exp; /* traverse the normal distributions */ s = 2 *normd->var; /* and get their parameters */ if (s < EPSILON) s = EPSILON; *--prob = exp(-d*d/s) /sqrt(M_PI*s); } /* compute the probability density */ } /* at the value of the attribute */ return 0; /* return 'ok' */} /* _exec() *//*--------------------------------------------------------------------*/static double _normd (double drand (void)){ /* --- compute N(0,1) distrib. number */ static double b; /* buffer for random number */ double x, y, r; /* coordinates and radius */ if (b != 0.0) { /* if the buffer is full, */ x = b; b = 0; return x; } /* return the buffered number */ do { /* pick a random point */ x = 2.0*drand()-1.0; /* in the unit square [-1,1]^2 */ y = 2.0*drand()-1.0; /* and check whether it lies */ r = x*x +y*y; /* inside the unit circle */ } while ((r > 1) || (r == 0)); r = sqrt(-2*log(r)/r); /* factor for Box-Muller transform */ b = x *r; /* save one of the random numbers */ return y *r; /* and return the other */} /* _normd() *//*--------------------------------------------------------------------*/#ifdef NBC_INDUCEstatic int _eval (NBC *nbc, TABLE *table, int mode, SELATT *savec, int cnt){ /* --- evaluate selectable attributes */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -