⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bcx.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 2 页
字号:
  int    attid;                 /* loop variable for attributes */  float  wgt;                   /* tuple/instantiation weight */  int    mode;                  /* classifier setup mode */  TSINFO *err;                  /* error information */  prgname = argv[0];            /* get program name for error msgs. */  /* --- print startup/usage message --- */  if (argc > 1) {               /* if arguments are given */    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);    fprintf(stderr, VERSION); } /* print a startup message */  else {                        /* if no argument given */    printf("usage: %s [options] bcfile "                     "[-d|-h hdrfile] tabfile [outfile]\n", argv[0]);    printf("%s\n", DESCRIPTION);    printf("%s\n", VERSION);    printf("-c#      classification field name "                    "(default: \"%s\")\n", res.n_class);    printf("-p#      confidence/probability field name "                    "(default: no confidence output)\n");    printf("-o#      output format for confidence/probability "                    "(default: \"%s\")\n", res.format);    printf("-x       print extended confidence information\n");    printf("-L#      Laplace correction "                    "(default: as specified in classifier)\n");    printf("-t#      probability threshold "                    "(two class problems only, default: %g)\n", thresh);    printf("-v/V     (do not) distribute tuple weight "                    "for null values\n");    printf("-m/M     (do not) use maximum likelihood estimate "                    "for the variance\n");    printf("-a       align fields (default: do not align)\n");    printf("-w       do not write field names to the output file\n");    printf("-b/f/r#  blank characters, field and record separators\n"           "         (default: \" \\t\\r\", \" ,\\t\", \"\\n\")\n");    printf("-u#      null value characters (default: \"?*\")\n");    printf("-C#      comment characters    (default: \"#\")\n");    printf("-n       number of tuple occurrences in last field\n");    printf("bcfile   file containing classifier description\n");    printf("-d       use default table header "                    "(field names = field numbers)\n");    printf("-h       read table header (field names) from hdrfile\n");    printf("hdrfile  file containing table header (field names)\n");    printf("tabfile  table file to read "                    "(field names in first record)\n");    printf("outfile  file to write output table to (optional)\n");    return 0;                   /* print a usage message */  }                             /* and abort the program */  /* --- evaluate arguments --- */  for (i = 1; i < argc; i++) {  /* traverse arguments */    s = argv[i];                /* get option argument */    if (optarg) { *optarg = s; optarg = NULL; continue; }    if ((*s == '-') && *++s) {  /* -- if argument is an option */      while (*s) {              /* traverse options */        switch (*s++) {         /* evaluate option */          case 'c': optarg    = &res.n_class;       break;          case 'p': optarg    = &res.n_prob;        break;          case 'o': optarg    = &res.format;        break;          case 'x': res.all   = 1;                  break;          case 'L': lcorr     = strtod(s, &s);      break;          case 't': thresh    = strtod(s, &s);      break;          case 'v': dwnull    = NBC_ALL;            break;          case 'V': dwnull   |= NBC_DWNULL|NBC_ALL; break;          case 'm': maxllh    = NBC_ALL;            break;          case 'M': maxllh   |= NBC_MAXLLH|NBC_ALL; break;          case 'n': inflags  |= AS_WEIGHT;                    outflags |= AS_WEIGHT;          break;          case 'a': outflags |= AS_ALIGN;           break;          case 'w': outflags &= ~AS_ATT;            break;          case 'b': optarg    = &blanks;            break;          case 'f': optarg    = &fldseps;           break;          case 'r': optarg    = &recseps;           break;          case 'u': optarg    = &nullchs;           break;          case 'C': optarg    = &comment;           break;          case 'd': inflags  |= AS_DFLT;            break;          case 'h': optarg    = &fn_hdr;            break;          default : error(E_OPTION, *--s);          break;        }                       /* set option variables */        if (!*s) break;         /* if at end of string, abort loop */        if (optarg) { *optarg = s; optarg = NULL; break; }      } }                       /* get option argument */    else {                      /* if argument is no option */      switch (k++) {            /* evaluate non-option */        case  0: fn_bc  = s;      break;        case  1: fn_tab = s;      break;        case  2: fn_out = s;      break;        default: error(E_ARGCNT); break;      }                         /* note filenames */    }  }  if (optarg) error(E_OPTARG);  /* check the option argument */  if ((k < 2) || (k > 3))       /* and the number of arguments */    error(E_ARGCNT);  if (fn_hdr && (strcmp(fn_hdr, "-") == 0))    fn_hdr = "";                /* convert "-" to "" */  i = (!fn_bc  || !*fn_bc) ? 1 : 0;  if  (!fn_tab || !*fn_tab) i++;  if  ( fn_hdr && !*fn_hdr) i++;/* check assignments of stdin: */  if (i > 1) error(E_STDIN);    /* stdin must not be used twice */  if ((lcorr < 0) && (lcorr > -DBL_MAX))    error(E_NEGLC);             /* check the Laplace correction */  if (fn_hdr)                   /* set the header file flag */    inflags = AS_ATT | (inflags & ~AS_DFLT);  if ((outflags & AS_ATT) && (outflags & AS_ALIGN))    outflags |= AS_ALNHDR;      /* set align to header flag */  /* --- read Bayes classifier --- */  scan = sc_create(fn_bc);      /* create a scanner */  if (!scan) error((!fn_bc || !*fn_bc) ? E_NOMEM : E_FOPEN, fn_bc);  attset = as_create("domains", att_delete);  if (!attset) error(E_NOMEM);  /* create an attribute set */  fprintf(stderr, "\nreading %s ... ", sc_fname(scan));  if ((sc_nexter(scan)   <  0)  /* start scanning (get first token) */  ||  (as_parse(attset, scan, AT_ALL) != 0)  ||  (as_attcnt(attset) <= 0)) /* parse attribute set */    error(E_PARSE, sc_fname(scan));  if ((sc_token(scan) == T_ID)  /* determine classifier type */  &&  (strcmp(sc_value(scan), "fbc") == 0))       fbc = fbc_parse(attset, scan);  else nbc = nbc_parse(attset, scan);  if ((!fbc && !nbc)            /* parse the Bayes classifier */  ||   !sc_eof(scan))           /* and check for end of file */    error(E_PARSE, sc_fname(scan));  sc_delete(scan); scan = NULL; /* delete the scanner */  fprintf(stderr, "[%d attribute(s)] done.\n", as_attcnt(attset));  if ((lcorr >= 0) || dwnull || maxllh) {    if (lcorr < 0)              /* get the classifier's parameters */      lcorr = (fbc) ? fbc_lcorr(fbc) : nbc_lcorr(nbc);    mode    = (fbc) ? fbc_mode(fbc)  : nbc_mode(nbc);    if (dwnull) mode = (mode & ~NBC_DWNULL) | dwnull;    if (maxllh) mode = (mode & ~NBC_MAXLLH) | maxllh;                                /* adapt the estimation parameters */    if (fbc) fbc_setup(fbc, mode, lcorr);    else     nbc_setup(nbc, mode, lcorr);  }                             /* set up the classifier anew */  if (fbc) {                    /* if full Bayes classifier */    clscnt  = fbc_clscnt(fbc);  /* get class information */    res.att = as_att(attset, fbc_clsid(fbc)); }  else {                        /* if naive Bayes classifier */    clscnt  = nbc_clscnt(nbc);  /* get class information */    res.att = as_att(attset, nbc_clsid(nbc));  }                             /* (class att. and num. of classes) */  /* --- read table header --- */  for (attid = as_attcnt(attset); --attid >= 0; )    att_setmark(as_att(attset, attid), 1);  att_setmark(res.att, 0);      /* mark all attribs. except the class */  as_chars(attset, recseps, fldseps, blanks, nullchs, comment);  in = io_hdr(attset, fn_hdr, fn_tab, inflags|AS_MARKED, 1);  if (!in) error(1);            /* read the table header */  /* --- classify tuples --- */  if ((att_getmark(res.att) < 0)/* either the class must be present */  &&  (k <= 2))                 /* or an output file must be written */    error(E_CLASS, att_name(res.att), fn_tab);  if (k > 2) {                  /* if to write an output table */    if ((outflags & AS_ALIGN)   /* if to align output file */    &&  (in != stdin)) {        /* and not to read from stdin */      i = AS_INST | (inflags & ~(AS_ATT|AS_DFLT));      while (as_read(attset, in, i) == 0);      fclose(in);               /* determine the column widths */      fprintf(stderr, "done.\n");      in = io_hdr(attset, fn_hdr, fn_tab, inflags|AS_MARKED, 1);      if (!in) error(1);        /* reread the table header */    }                           /* (necessary because of first tuple) */    if (fn_out && *fn_out)      /* if a proper file name is given, */      out = fopen(fn_out, "w"); /* open output file for writing */    else {                      /* if no proper file name is given, */      out = stdout; fn_out = "<stdout>"; }       /* write to stdout */    if (!out) error(E_FOPEN, fn_out);    k = AS_MARKED|AS_INFO1|AS_RDORD|outflags;    if (outflags & AS_ATT)      /* if to write table header */      as_write(attset, out, k, infout);    k = AS_INST|(k & ~AS_ATT);  /* write the attribute names */  }                             /* to the output file */  f = AS_INST | (inflags & ~(AS_ATT|AS_DFLT));  i = ((inflags & AS_DFLT) && !(inflags & AS_ATT))    ? 0 : as_read(attset, in, f);  while (i == 0) {              /* record read loop */    res.class = (fbc) ? fbc_exec(fbc, NULL, &res.prob)                      : nbc_exec(nbc, NULL, &res.prob);    if (clscnt <= 2) {          /* if this is a two class problem */      if (res.class <= 0) {     /* check and adapt class 0 result */	if (res.prob <   thresh) {          res.class = 1; res.prob = 1 -res.prob; } }      else {                    /* check and adapt class 1 result */        if (res.prob < 1-thresh) {          res.class = 0; res.prob = 1 -res.prob; }      }                         /* (classify as class 0 if prob. */    }                           /* of this class is >= threshold) */    wgt = as_getwgt(attset);    /* classify tuple */    tplwgt += wgt; tplcnt++;    /* count tuple and sum its weight */    if (res.class != att_inst(res.att)->i)      errcnt += wgt;            /* count classification errors */    if (out && (as_write(attset, out, k, infout) != 0))      error(E_FWRITE, fn_out);  /* write tuple to output file */    i = as_read(attset, in, f); /* try to read the next record */  }  if (i < 0) {                  /* if an error occurred, */    err = as_err(attset);       /* get the error information */    tplcnt += (inflags & (AS_ATT|AS_DFLT)) ? 1 : 2;    io_error(i, fn_tab, tplcnt, err->s, err->fld, err->exp);    error(1);                   /* print an error message */  }                             /* and abort the program */  if (in != stdin) fclose(in);  /* close the table file and */  in = NULL;                    /* clear the file variable */  if (out && (out != stdout)) { /* if an output file exists, */    i = fclose(out); out = NULL;/* close the output file */    if (i) error(E_FWRITE, fn_out);  }                             /* print a sucess message */  fprintf(stderr, "[%d/%g tuple(s)] done.\n", tplcnt, tplwgt);  if (att_getmark(res.att) >= 0) {    fprintf(stderr, "%g error(s) (%.2f%%)\n", errcnt,            (tplwgt > 0) ? 100*(errcnt /tplwgt) : 0);  }                             /* if class found, print errors */  /* --- clean up --- */  #ifndef NDEBUG  if (fbc) fbc_delete(fbc, 1);  /* delete full  Bayes classifier */  if (nbc) nbc_delete(nbc, 1);  /* or     naive Bayes classifier */  #endif                        /* and underlying attribute set */  #ifdef STORAGE  showmem("at end of program"); /* check memory usage */  #endif  return 0;                     /* return 'ok' */}  /* main() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -