⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbayes.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 5 页
字号:
            putc(',', file); pos++; }         /* print a separator */          len  = sc_format(name, att_valname(att, k), 0);          len += l = sprintf(num, ": %g", discd->frqs[k]);          if (mode & NBC_REL)   /* format value frequency */            len += sprintf(num +l, " (%.1f%%)", discd->probs[k]*100);          if ((pos      > ind)  /* if the line would get too long */          &&  (pos +len > maxlen -4)) {            putc('\n', file);   /* start a new line and indent */            for (pos = 0; pos < ind; pos++) putc(' ', file); }          else {                /* if there is enough space left, */            putc(' ', file); pos++; }   /* only print a separator */          fputs(name, file); fputs(num, file);          pos += len;           /* print value and its frequency */        }                       /* and update the output position */        fputs(" }", file);      /* terminate the value distribution */      } }    else {                      /* if the attribute is numeric, */      normd = dvec->normds;     /* traverse the normal distributions */      for (i = 0; i < nbc->clscnt; normd++, i++) {        if (i > 0)              /* if this is not the first class, */          fputs(",\n    ", file);       /* start a new output line */        len = sc_format(name, att_valname(clsatt, i), 0);        fputs(name, file);      /* get and print the class name */        for (pos = len+2; pos < ind; pos++)          putc(' ', file);      /* pad with blanks to equal width */        fprintf(file, ": N(%g, %g) [%g]",                normd->exp, normd->var, normd->cnt);      }                         /* print the normal distribution */      putc(' ', file);          /* with expected value and variance */    }  /* if (dvec->discds) .. else .. */    fputs("};\n", file);        /* terminate the distributions */  }  /* for (n = 0; .. */  fputs("};\n", file);          /* terminate the classifier */  return ferror(file) ? -1 : 0; /* return the write status */}  /* nbc_desc() *//*--------------------------------------------------------------------*/#ifdef NBC_PARSEstatic int _distin (SCAN *scan, ATT *att, double *frqs, double *sum){                               /* --- read a distribution */  int    i, cnt;                /* loop variable, number of values */  double *p, f;                 /* to traverse the frequencies */  int    t;                     /* buffer for token */  assert(scan && att && frqs && sum); /* check the function arguments */  GET_CHR('{');                 /* consume '{' (start of distrib.) */  cnt = att_valcnt(att);        /* get the number of att. values */  for (p = frqs +(i = cnt); --i >= 0; )    *--p = -1;                  /* clear the value frequencies */  while (1) {                   /* attribute value read loop */    t = sc_token(scan);         /* check for a name */    if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP);    if (t != T_NUM) t = ':';    /* if the token is no number, */    else {                      /* the token must be an att. value, */      GET_TOK();                /* otherwise consume the token, */      t = sc_token(scan);       /* note the next token, and */      sc_back(scan);            /* go back to the previous one */    }                           /* (look ahead one token) */    if (t != ':')               /* if no ':' follows, */      i = (i+1) % cnt;          /* get the cyclic successor id */    else {                      /* if a  ':' follows */      i = att_valid(att, sc_value(scan));      if (i < 0) ERROR(E_UNKVAL);      GET_TOK();                /* get and consume the value */      GET_CHR(':');             /* consume ':' */    }    if (frqs[i] >= 0)           /* check whether value has been read */      XERROR(E_DUPVAL, att_valname(att, i));    if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP);    f = atof(sc_value(scan));   /* get and check */    if (f < 0) ERROR(E_NUMBER); /* the value frequency */    frqs[i] = f;                /* set the value frequency */    GET_TOK();                  /* consume the value frequency */    if (sc_token(scan) == '('){ /* if a relative number follows, */      GET_TOK();                /* consume '(' */      if (sc_token(scan) != T_NUM)  ERROR(E_NUMEXP);      if (atof(sc_value(scan)) < 0) ERROR(E_NUMBER);      GET_TOK();                /* consume the relative number */      GET_CHR('%');             /* consume '%' */      GET_CHR(')');             /* consume ')' */    }    if (sc_token(scan) != ',') break;    GET_TOK();                  /* if at end of list, abort loop, */  }                             /* otherwise consume ',' */  GET_CHR('}');                 /* consume '}' (end of distribution) */  for (f = 0, p = frqs +(i = cnt); --i >= 0; ) {    if (*--p < 0) *p = 0;       /* clear the unset frequencies */    else          f += *p;      /* and sum all other frequencies */  }                             /* to obtain the total frequency */  *sum = f;                     /* set the sum of the frequencies */  return 0;                     /* return 'ok' */}  /* _distin() *//*--------------------------------------------------------------------*/static int _discdin (NBC *nbc, SCAN *scan,                     ATT *clsatt, ATT *att, DVEC *dvec){                               /* --- read discrete distributions */  int   i = -1, t;              /* class identifier, buffer */  DISCD *discd;                 /* to access discrete distribution */  assert(nbc && clsatt && att && dvec); /* check function arguments */  for (discd = dvec->discds +(i = nbc->clscnt); --i >= 0; )    (--discd)->cnt = -1;        /* unmark all distributions */  while (1) {                   /* distribution read loop */    if (sc_token(scan) == '{')  /* if no class name is given, */      i = (i+1) % nbc->clscnt;  /* get the cyclic successor */    else {                      /* if a class name is given, */      t = sc_token(scan);       /* check for a name */      if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP);      i = att_valid(clsatt, sc_value(scan));      if (i < 0) ERROR(E_UNKVAL);      GET_TOK();                /* get and consume the value */      GET_CHR(':');             /* consume ':' */    }    discd = dvec->discds +i;    /* get and check the distribution */    if (discd->cnt >= 0) XERROR(E_DUPVAL, att_valname(clsatt, i));    discd->cnt = 0;             /* clear the counter as a flag */    t = _distin(scan, att, discd->frqs, &discd->cnt);    if (t) return t;            /* read distribution */    if (sc_token(scan) != ',') break;    GET_TOK();                  /* if at end of list, abort loop */  }                             /* otherwise consume ',' */  for (discd = dvec->discds +(i = nbc->clscnt); --i >= 0; )    if ((--discd)->cnt < 0) discd->cnt = 0;                                /* clear the unset counters */  return 0;                     /* return 'ok' */}  /* _discdin() *//*--------------------------------------------------------------------*/static int _contdin (NBC *nbc, SCAN *scan,                     ATT *clsatt, ATT *att, DVEC *dvec){                               /* --- read continuous distributions */  int    i = -1;                /* class identifier, buffer */  NORMD  *normd;                /* to access normal distribution */  double t;                     /* temporary buffer */  assert(nbc && clsatt && att && dvec); /* check function arguments */  for (normd = dvec->normds +(i = nbc->clscnt); --i >= 0; )    (--normd)->cnt = -1;        /* unmark all distributions */  while (1) {                   /* distribution read loop */    t = sc_token(scan);         /* check for a name */    if ((t != T_ID) && (t != T_NUM)) ERROR(E_VALEXP);    if (t == T_NUM) t = ':';    /* if the token is a number, */    else {                      /* the token must be a class */      GET_TOK();                /* otherwise consume the token, */      t = sc_token(scan);       /* note the next token, and */      sc_back(scan);            /* go back to the previous one */    }                           /* (look ahead one token) */    if (t != ':')               /* if no class name is given, */      i = (i+1) % nbc->clscnt;  /* get the cyclic successor id */    else {                      /* if a  class name is given */      i = att_valid(clsatt, sc_value(scan));      if (i < 0) ERROR(E_UNKVAL);      GET_TOK();                /* get and consume the class */      GET_CHR(':');             /* consume ':' */    }    normd = dvec->normds +i;    /* get the normal distribution and */    if (normd->cnt >= 0)        /* check whether it is already set */      XERROR(E_DUPVAL, att_valname(clsatt, i));    normd->cnt = 0;             /* clear the counter as a flag */    if ((sc_token(scan) != T_ID)    ||  (strcmp(sc_value(scan), "N") != 0))      ERR_STR("N");             /* check for an 'N' */    GET_TOK();                  /* consume 'N' */    GET_CHR('(');               /* consume '(' */    if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP);    normd->exp = atof(sc_value(scan));    GET_TOK();                  /* get and consume the exp. value */    GET_CHR(',');               /* consume ',' */    if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP);    normd->var = atof(sc_value(scan));    if (normd->var < 0)          ERROR(E_NUMBER);    GET_TOK();                  /* get and consume the variance */    GET_CHR(')');               /* consume ')' */    if (sc_token(scan) != '['){ /* if no number of cases follows, */      normd->cnt = nbc->frqs[i];/* get the class frequencies */      if (normd->cnt <= 1) normd->cnt = 2; }    else {                      /* if a number of cases follows, */      GET_TOK();                /* consume '[' and */      if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP);      normd->cnt = atof(sc_value(scan));      if (normd->cnt < 0)          ERROR(E_NUMBER);      GET_TOK();                /* consume the number of cases */      GET_CHR(']');             /* consume ']' */    }                           /* then compute the sums */    normd->sv  = normd->exp *(t = normd->cnt);    if (!(nbc->mode & NBC_MAXLLH)) t -= 1;    normd->sv2 = normd->var *t +normd->exp *normd->sv;    if (sc_token(scan) != ',') break;    GET_TOK();                  /* if at end of list, abort loop, */  }                             /* otherwise consume ',' */  for (normd = dvec->normds +(i = nbc->clscnt); --i >= 0; )    if ((--normd)->cnt < 0) normd->cnt = 0;                                /* clear the unset counters */  return 0;                     /* return 'ok' */}  /* _contdin() *//*--------------------------------------------------------------------*/static int _dvecsin (NBC *nbc, SCAN *scan, ATT *clsatt){                               /* --- read distribution vectors */  int  t;                       /* temporary buffer */  int  attid;                   /* attribute identifier */  ATT  *att;                    /* current attribute */  DVEC *dvec;                   /* to traverse the distrib. vectors */  assert(nbc && scan && clsatt);   /* check the function arguments */  while ((sc_token(scan) == T_ID)  /* while another dist. follows */  &&     ((strcmp(sc_value(scan), "prob") == 0)  ||      (strcmp(sc_value(scan), "P")    == 0))) {    GET_TOK();                  /* consume 'prob' or 'P' */    GET_CHR('(');               /* consume '(' */    t = sc_token(scan);         /* check for a name */    if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP);    attid = as_attid(nbc->attset, sc_value(scan));    if (attid < 0)                   ERROR(E_UNKATT);    att  = as_att(nbc->attset, attid);    dvec = nbc->dvecs +attid;   /* get and check the attribute */    if (dvec->type == 0) ERROR(E_ATTYPE);    if (dvec->mark >= 0) ERROR(E_DUPATT);    dvec->mark = 1;             /* set the read flag */    GET_TOK();                  /* consume the attribute name */    GET_CHR('|');               /* consume '|' (condition indicator) */    t = sc_token(scan);         /* get the next token */    if (((t != T_ID) && (t != T_NUM))    ||  (strcmp(sc_value(scan), att_name(clsatt)) != 0))      ERROR(E_CLSEXP);          /* check for the class att. name */    GET_TOK();                  /* consume the class att. name */    GET_CHR(')');               /* consume ')' */    GET_CHR('=');               /* consume '=' */    GET_CHR('{');               /* consume '{' */    if (sc_token(scan) != '}'){ /* if a distribution vector follows */      t = (dvec->type == AT_NOM)        ? _discdin(nbc, scan, clsatt, att, dvec)        : _contdin(nbc, scan, clsatt, att, dvec);      if (t) return t;          /* read conditional distributions */    }                           /* and check for an error */    GET_CHR('}');               /* consume '

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -