📄 nbayes.c
字号:
putc(',', file); pos++; } /* print a separator */ len = sc_format(name, att_valname(att, k), 0); len += l = sprintf(num, ": %g", discd->frqs[k]); if (mode & NBC_REL) /* format value frequency */ len += sprintf(num +l, " (%.1f%%)", discd->probs[k]*100); if ((pos > ind) /* if the line would get too long */ && (pos +len > maxlen -4)) { putc('\n', file); /* start a new line and indent */ for (pos = 0; pos < ind; pos++) putc(' ', file); } else { /* if there is enough space left, */ putc(' ', file); pos++; } /* only print a separator */ fputs(name, file); fputs(num, file); pos += len; /* print value and its frequency */ } /* and update the output position */ fputs(" }", file); /* terminate the value distribution */ } } else { /* if the attribute is numeric, */ normd = dvec->normds; /* traverse the normal distributions */ for (i = 0; i < nbc->clscnt; normd++, i++) { if (i > 0) /* if this is not the first class, */ fputs(",\n ", file); /* start a new output line */ len = sc_format(name, att_valname(clsatt, i), 0); fputs(name, file); /* get and print the class name */ for (pos = len+2; pos < ind; pos++) putc(' ', file); /* pad with blanks to equal width */ fprintf(file, ": N(%g, %g) [%g]", normd->exp, normd->var, normd->cnt); } /* print the normal distribution */ putc(' ', file); /* with expected value and variance */ } /* if (dvec->discds) .. else .. */ fputs("};\n", file); /* terminate the distributions */ } /* for (n = 0; .. */ fputs("};\n", file); /* terminate the classifier */ return ferror(file) ? -1 : 0; /* return the write status */} /* nbc_desc() *//*--------------------------------------------------------------------*/#ifdef NBC_PARSEstatic int _distin (SCAN *scan, ATT *att, double *frqs, double *sum){ /* --- read a distribution */ int i, cnt; /* loop variable, number of values */ double *p, f; /* to traverse the frequencies */ int t; /* buffer for token */ assert(scan && att && frqs && sum); /* check the function arguments */ GET_CHR('{'); /* consume '{' (start of distrib.) */ cnt = att_valcnt(att); /* get the number of att. values */ for (p = frqs +(i = cnt); --i >= 0; ) *--p = -1; /* clear the value frequencies */ while (1) { /* attribute value read loop */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP); if (t != T_NUM) t = ':'; /* if the token is no number, */ else { /* the token must be an att. value, */ GET_TOK(); /* otherwise consume the token, */ t = sc_token(scan); /* note the next token, and */ sc_back(scan); /* go back to the previous one */ } /* (look ahead one token) */ if (t != ':') /* if no ':' follows, */ i = (i+1) % cnt; /* get the cyclic successor id */ else { /* if a ':' follows */ i = att_valid(att, sc_value(scan)); if (i < 0) ERROR(E_UNKVAL); GET_TOK(); /* get and consume the value */ GET_CHR(':'); /* consume ':' */ } if (frqs[i] >= 0) /* check whether value has been read */ XERROR(E_DUPVAL, att_valname(att, i)); if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); f = atof(sc_value(scan)); /* get and check */ if (f < 0) ERROR(E_NUMBER); /* the value frequency */ frqs[i] = f; /* set the value frequency */ GET_TOK(); /* consume the value frequency */ if (sc_token(scan) == '('){ /* if a relative number follows, */ GET_TOK(); /* consume '(' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); if (atof(sc_value(scan)) < 0) ERROR(E_NUMBER); GET_TOK(); /* consume the relative number */ GET_CHR('%'); /* consume '%' */ GET_CHR(')'); /* consume ')' */ } if (sc_token(scan) != ',') break; GET_TOK(); /* if at end of list, abort loop, */ } /* otherwise consume ',' */ GET_CHR('}'); /* consume '}' (end of distribution) */ for (f = 0, p = frqs +(i = cnt); --i >= 0; ) { if (*--p < 0) *p = 0; /* clear the unset frequencies */ else f += *p; /* and sum all other frequencies */ } /* to obtain the total frequency */ *sum = f; /* set the sum of the frequencies */ return 0; /* return 'ok' */} /* _distin() *//*--------------------------------------------------------------------*/static int _discdin (NBC *nbc, SCAN *scan, ATT *clsatt, ATT *att, DVEC *dvec){ /* --- read discrete distributions */ int i = -1, t; /* class identifier, buffer */ DISCD *discd; /* to access discrete distribution */ assert(nbc && clsatt && att && dvec); /* check function arguments */ for (discd = dvec->discds +(i = nbc->clscnt); --i >= 0; ) (--discd)->cnt = -1; /* unmark all distributions */ while (1) { /* distribution read loop */ if (sc_token(scan) == '{') /* if no class name is given, */ i = (i+1) % nbc->clscnt; /* get the cyclic successor */ else { /* if a class name is given, */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP); i = att_valid(clsatt, sc_value(scan)); if (i < 0) ERROR(E_UNKVAL); GET_TOK(); /* get and consume the value */ GET_CHR(':'); /* consume ':' */ } discd = dvec->discds +i; /* get and check the distribution */ if (discd->cnt >= 0) XERROR(E_DUPVAL, att_valname(clsatt, i)); discd->cnt = 0; /* clear the counter as a flag */ t = _distin(scan, att, discd->frqs, &discd->cnt); if (t) return t; /* read distribution */ if (sc_token(scan) != ',') break; GET_TOK(); /* if at end of list, abort loop */ } /* otherwise consume ',' */ for (discd = dvec->discds +(i = nbc->clscnt); --i >= 0; ) if ((--discd)->cnt < 0) discd->cnt = 0; /* clear the unset counters */ return 0; /* return 'ok' */} /* _discdin() *//*--------------------------------------------------------------------*/static int _contdin (NBC *nbc, SCAN *scan, ATT *clsatt, ATT *att, DVEC *dvec){ /* --- read continuous distributions */ int i = -1; /* class identifier, buffer */ NORMD *normd; /* to access normal distribution */ double t; /* temporary buffer */ assert(nbc && clsatt && att && dvec); /* check function arguments */ for (normd = dvec->normds +(i = nbc->clscnt); --i >= 0; ) (--normd)->cnt = -1; /* unmark all distributions */ while (1) { /* distribution read loop */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP); if (t == T_NUM) t = ':'; /* if the token is a number, */ else { /* the token must be a class */ GET_TOK(); /* otherwise consume the token, */ t = sc_token(scan); /* note the next token, and */ sc_back(scan); /* go back to the previous one */ } /* (look ahead one token) */ if (t != ':') /* if no class name is given, */ i = (i+1) % nbc->clscnt; /* get the cyclic successor id */ else { /* if a class name is given */ i = att_valid(clsatt, sc_value(scan)); if (i < 0) ERROR(E_UNKVAL); GET_TOK(); /* get and consume the class */ GET_CHR(':'); /* consume ':' */ } normd = dvec->normds +i; /* get the normal distribution and */ if (normd->cnt >= 0) /* check whether it is already set */ XERROR(E_DUPVAL, att_valname(clsatt, i)); normd->cnt = 0; /* clear the counter as a flag */ if ((sc_token(scan) != T_ID) || (strcmp(sc_value(scan), "N") != 0)) ERR_STR("N"); /* check for an 'N' */ GET_TOK(); /* consume 'N' */ GET_CHR('('); /* consume '(' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); normd->exp = atof(sc_value(scan)); GET_TOK(); /* get and consume the exp. value */ GET_CHR(','); /* consume ',' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); normd->var = atof(sc_value(scan)); if (normd->var < 0) ERROR(E_NUMBER); GET_TOK(); /* get and consume the variance */ GET_CHR(')'); /* consume ')' */ if (sc_token(scan) != '['){ /* if no number of cases follows, */ normd->cnt = nbc->frqs[i];/* get the class frequencies */ if (normd->cnt <= 1) normd->cnt = 2; } else { /* if a number of cases follows, */ GET_TOK(); /* consume '[' and */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); normd->cnt = atof(sc_value(scan)); if (normd->cnt < 0) ERROR(E_NUMBER); GET_TOK(); /* consume the number of cases */ GET_CHR(']'); /* consume ']' */ } /* then compute the sums */ normd->sv = normd->exp *(t = normd->cnt); if (!(nbc->mode & NBC_MAXLLH)) t -= 1; normd->sv2 = normd->var *t +normd->exp *normd->sv; if (sc_token(scan) != ',') break; GET_TOK(); /* if at end of list, abort loop, */ } /* otherwise consume ',' */ for (normd = dvec->normds +(i = nbc->clscnt); --i >= 0; ) if ((--normd)->cnt < 0) normd->cnt = 0; /* clear the unset counters */ return 0; /* return 'ok' */} /* _contdin() *//*--------------------------------------------------------------------*/static int _dvecsin (NBC *nbc, SCAN *scan, ATT *clsatt){ /* --- read distribution vectors */ int t; /* temporary buffer */ int attid; /* attribute identifier */ ATT *att; /* current attribute */ DVEC *dvec; /* to traverse the distrib. vectors */ assert(nbc && scan && clsatt); /* check the function arguments */ while ((sc_token(scan) == T_ID) /* while another dist. follows */ && ((strcmp(sc_value(scan), "prob") == 0) || (strcmp(sc_value(scan), "P") == 0))) { GET_TOK(); /* consume 'prob' or 'P' */ GET_CHR('('); /* consume '(' */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP); attid = as_attid(nbc->attset, sc_value(scan)); if (attid < 0) ERROR(E_UNKATT); att = as_att(nbc->attset, attid); dvec = nbc->dvecs +attid; /* get and check the attribute */ if (dvec->type == 0) ERROR(E_ATTYPE); if (dvec->mark >= 0) ERROR(E_DUPATT); dvec->mark = 1; /* set the read flag */ GET_TOK(); /* consume the attribute name */ GET_CHR('|'); /* consume '|' (condition indicator) */ t = sc_token(scan); /* get the next token */ if (((t != T_ID) && (t != T_NUM)) || (strcmp(sc_value(scan), att_name(clsatt)) != 0)) ERROR(E_CLSEXP); /* check for the class att. name */ GET_TOK(); /* consume the class att. name */ GET_CHR(')'); /* consume ')' */ GET_CHR('='); /* consume '=' */ GET_CHR('{'); /* consume '{' */ if (sc_token(scan) != '}'){ /* if a distribution vector follows */ t = (dvec->type == AT_NOM) ? _discdin(nbc, scan, clsatt, att, dvec) : _contdin(nbc, scan, clsatt, att, dvec); if (t) return t; /* read conditional distributions */ } /* and check for an error */ GET_CHR('}'); /* consume '
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -