⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gendb.c

📁 数据挖掘中的一算法 ines算法 c下实现的。适合初学习数据挖掘者借鉴
💻 C
字号:
/*----------------------------------------------------------------------  File    : gendb.c  Contents: program to generate a random database            which satisfies a given probability distribution  Author  : Christian Borgelt  History : 19.10.1995 file created            26.10.1995 attribute set added            30.10.1995 instantiation of attribute set added            26.11.1995 adapted to modified attset functions            24.02.1996 adapted to modified attset functions            10.03.1996 alignment (option -a) added            17.03.1996 adapted to modified parse function            09.04.1996 adapted to modified gm_rand            22.11.1996 options -b, -f, and -r added            23.06.1998 adapted to modified attset functions            07.08.1998 option -w (do not write header) added            18.04.1999 simplified using the new module 'io'            24.04.1999 distribution expansion (option -x) added            25.11.2001 adapted to modified scanner functions            21.01.2002 adapted to modified gramod and ptree modules            04.02.2002 check for empty attribute domains added            16.08.2003 slight changes in error message output----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <assert.h>#include <time.h>#ifndef AS_RDWR#define AS_RDWR#endif#ifndef AS_PARSE#define AS_PARSE#endif#ifndef GM_PARSE#define GM_PARSE#endif#include "gramod.h"#include "io.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "gendb"#define DESCRIPTION "generate a random database"#define VERSION     "version 2.4 (2004.08.12)         " \                    "(c) 1995-2004   Christian Borgelt"/* --- error codes --- */#define OK            0         /* no error */#define E_NONE        0         /* no error */#define E_NOMEM     (-1)        /* not enough memory */#define E_FOPEN     (-2)        /* file open failed */#define E_FREAD     (-3)        /* file read failed */#define E_FWRITE    (-4)        /* file write failed */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* wrong number of arguments */#define E_PARSE     (-8)        /* parse error */#define E_TPLCNT    (-9)        /* illegal number of tuples */#define E_EMPTY    (-10)        /* empty attribute domain */#define E_LOOP     (-11)        /* loop in conditions */#define E_UNKNOWN  (-12)        /* unknown error *//*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/const char *errmsgs[] = {       /* error messages */  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_PARSE    -8 */  "parse error(s) on file %s\n",  /* E_TPLCNT   -9 */  "invalid number of tuples\n",  /* E_EMPTY   -10 */  "domain of attribute %s is empty\n",  /* E_LOOP    -11 */  "condition loop detected\n",  /* E_UNKNOWN -12 */  "unknown error\n"};/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/const  char   *prgname = NULL;  /* program name for error messages */static SCAN   *scan    = NULL;  /* scanner */static ATTSET *attset  = NULL;  /* attribute set */static GRAMOD *gramod  = NULL;  /* graphical model (Bayesian network) */static FILE   *out     = NULL;  /* output file *//*----------------------------------------------------------------------  Random Number Functions----------------------------------------------------------------------*/#ifdef DRAND48                  /* if library for drand48() available */extern void   srand48 (long seed);extern double drand48 (void);   /* use drand48 functions */#define dseed(s) srand48((long)(s))#define drand    drand48#else                           /* if only standard rand() available */#define dseed(s) srand((unsigned)(s))static double drand (void)      /* compute value from rand() result */{ return rand()/(RAND_MAX +1.0); }#endif/*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/void error (int code, ...){                               /* --- print error message */  va_list    args;              /* list of variable arguments */  const char *msg;              /* error message */  assert(prgname);              /* check the program name */  if (code < E_UNKNOWN) code = E_UNKNOWN;  if (code < 0) {               /* if to report an error, */    msg = errmsgs[-code];       /* get the error message */    if (!msg) msg = errmsgs[-E_UNKNOWN];    fprintf(stderr, "\n%s: ", prgname);    va_start(args, code);       /* get variable arguments */    vfprintf(stderr, msg, args);/* print the error message */    va_end(args);               /* end argument evaluation */  }  #ifndef NDEBUG  if (gramod) gm_delete(gramod, 0);  if (attset) as_delete(attset);/* clean up memory */  if (scan)   sc_delete(scan);  /* and close files */  if (out && (out != stdout)) fclose(out);  #endif  #ifdef STORAGE  showmem("at end of program"); /* check memory usage */  #endif  exit(code);                   /* abort the program */}  /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){                               /* --- main function */  int    i, k = 0;              /* loop variables, counter */  char   *s;                    /* to traverse options */  char   **optarg = NULL;       /* option argument */  char   *fn_in   = NULL;       /* name of input file */  char   *fn_out  = NULL;       /* name of output file */  char   *blank   = NULL;       /* blank */  char   *fldsep  = NULL;       /* field  separator */  char   *recsep  = NULL;       /* record separator */  int    flags    = AS_ATT;     /* table file write flags */  int    expand   = 0;          /* whether to expand the distribution */  int    tplcnt   = 1000;       /* number of tuples to generate */  long   seed;                  /* seed for random number generator */  double prob;                  /* probability of a tuple */  ATT    *att;                  /* to traverse the attributes */  INST   *inst;                 /* to traverse the att. instances */  prgname = argv[0];            /* get program name for error msgs. */  seed    = (long)time(NULL);   /* and get a default seed value */  /* --- print startup/usage message --- */  if (argc > 1) {               /* if arguments are given */    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);    fprintf(stderr, VERSION); } /* print a startup message */  else {                        /* if no argument given */    printf("usage: %s [options] pdfile dbfile\n", argv[0]);    printf("%s\n", DESCRIPTION);    printf("%s\n", VERSION);    printf("-n#      number of tuples to generate "                    "(default: %d)\n", tplcnt);    printf("-s#      seed for random number generator "                    "(default: time)\n");    printf("-x       expand the distribution (do not sample)\n");    printf("-w       do not write field names to the output file\n");    printf("-a       align fields/columns (default: do not align)\n");    printf("-b/f/r#  blank character, field and record separator\n");    printf("         (default: \" \", \" \", \"\\n\")\n");    printf("pdfile   file containing probability distributions\n");    printf("dbfile   file to write generated tuples to\n");    return 0;                   /* print a usage message */  }                             /* and abort the program */  /* --- evaluate arguments --- */  for (i = 1; i < argc; i++) {  /* traverse arguments */    s = argv[i];                /* get option argument */    if (optarg) { *optarg = s; optarg = NULL; continue; }    if ((*s == '-') && *++s) {  /* -- if argument is an option */      while (1) {               /* traverse characters */        switch (*s++) {         /* evaluate option */          case 'n': tplcnt = (int)strtol(s, &s, 0); break;          case 's': seed   =      strtol(s, &s, 0); break;          case 'x': expand = 1;                     break;          case 'w': flags &= AS_ATT;                break;          case 'a': flags |= AS_ALIGN;              break;          case 'b': optarg = &blank;                break;          case 'f': optarg = &fldsep;               break;          case 'r': optarg = &recsep;               break;          default : error(E_OPTION, *(--s));        break;        }                       /* set option variables */        if (!*s) break;         /* if at end of string, abort loop */        if (optarg) { *optarg = s; optarg = NULL; break; }      } }                       /* get option argument */    else {                      /* -- if argument is no option */      switch (k++) {            /* evaluate non-options */        case  0: fn_in  = s;      break;        case  1: fn_out = s;      break;        default: error(E_ARGCNT); break;      }                         /* note input/output filename */    }  }  if (optarg)     error(E_OPTARG);  /* if missing option argument */  if (k != 2)     error(E_ARGCNT);  /* if too few arguments given */  if (tplcnt < 0) error(E_TPLCNT);  /* or tuple count invalid, abort */  if (expand) flags |= AS_WEIGHT;   /* set the weight output flag */  /* --- read probability distributions --- */  scan = sc_create(fn_in);      /* create a scanner */  if (!scan) error((!fn_in || !*fn_in) ? E_NOMEM : E_FOPEN, fn_in);  attset = as_create("domains", att_delete);  if (!attset) error(E_NOMEM);  /* create an attribute set */  fprintf(stderr, "\nreading %s ... ", sc_fname(scan));  if ((sc_nexter(scan)   <  0)  /* start scanning (get first token) */  ||  (as_parse(attset, scan, AT_SYM) != 0)  ||  (as_attcnt(attset) <= 0)) /* parse attribute set */    error(E_PARSE, sc_fname(scan));  for (i = as_attcnt(attset); --i >= 0; ) {    att = as_att(attset, i);    /* traverse the attributes */    if (att_valcnt(att) <= 0) error(E_EMPTY, att_name(att));  }                             /* check for empty domains */  gramod = gm_parse(attset, scan, GM_PROB|GM_ALL);  if (!gramod || !sc_eof(scan)) /* parse the distributions */    error(E_PARSE, sc_fname(scan));  if (gm_check(gramod, -1, 0) != 0)    error(E_LOOP);              /* check conditions to detect loops */  fprintf(stderr, "[%d attribute(s)] done.\n", as_attcnt(attset));  sc_delete(scan); scan = NULL; /* delete the scanner */  /* --- generate database --- */  if (fn_out && *fn_out)        /* if an output file name is given, */    out = fopen(fn_out, "w");   /* open the output file for writing */  else {                        /* if no output file name is given, */    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */  fprintf(stderr, "writing %s ... ", fn_out);  if (!out) error(E_FOPEN, fn_out);  if ((flags & AS_ATT)          /* if to write a table header */  &&  (as_write(attset, out, flags) != 0))    error(E_FWRITE, fn_out);    /* write the attributes names */  flags = AS_INST | (flags & ~AS_ATT);  if (expand) {                 /* if to expand the distribution */    for (i = as_attcnt(attset); --i >= 0; ) {      att = as_att(attset, i);  /* traverse the attributes */      att_inst(att)->i = att_valcnt(att) -1;    }                           /* set the last value of each domain */    do {                        /* generate tuples */      prob = gm_exec(gramod);   /* determine tuple probability */      as_setwgt(attset, (float)(prob *tplcnt));      if (as_write(attset, out, flags) != 0)        error(E_FWRITE,fn_out); /* write the generated tuple */      for (i = as_attcnt(attset); --i >= 0; ) {        att  = as_att(attset, i);  /* traverse the attributes */        inst = att_inst(att);      /* and their instances and */        if (--inst->i >= 0) break; /* set the next att. value */        inst->i = att_valcnt(att) -1;    /* or the last value */      }                         /* (compute next value combination) */    } while (i >= 0); }         /* while there is another combination */  else {                        /* if to sample from the distrib. */    dseed(seed);                /* init. random number generator */    for (i = tplcnt; --i >= 0;){/* generate random tuples */      gm_rand(gramod, drand);   /* instantiate the attribute set */      if (as_write(attset, out, flags) != 0)        error(E_FWRITE,fn_out); /* write the generated tuple */    }                           /* to the output file */  }  if (out != stdout) {          /* if not written to stdout */    i = fclose(out); out = NULL;/* close the output file */    if (i != 0) error(E_FWRITE, fn_out);  }                             /* print a success message */  fprintf(stderr, "[%d tuple(s)] done.\n", tplcnt);  /* --- clean up --- */  #ifndef NDEBUG  gm_delete(gramod, 1);         /* delete the graphical model */  #endif                        /* (i.e. the Bayesian network) */  #ifdef STORAGE  showmem("at end of program"); /* check memory usage */  #endif  return 0;                     /* return 'ok' */}  /* main() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -