📄 gendb.c
字号:
/*---------------------------------------------------------------------- File : gendb.c Contents: program to generate a random database which satisfies a given probability distribution Author : Christian Borgelt History : 19.10.1995 file created 26.10.1995 attribute set added 30.10.1995 instantiation of attribute set added 26.11.1995 adapted to modified attset functions 24.02.1996 adapted to modified attset functions 10.03.1996 alignment (option -a) added 17.03.1996 adapted to modified parse function 09.04.1996 adapted to modified gm_rand 22.11.1996 options -b, -f, and -r added 23.06.1998 adapted to modified attset functions 07.08.1998 option -w (do not write header) added 18.04.1999 simplified using the new module 'io' 24.04.1999 distribution expansion (option -x) added 25.11.2001 adapted to modified scanner functions 21.01.2002 adapted to modified gramod and ptree modules 04.02.2002 check for empty attribute domains added 16.08.2003 slight changes in error message output----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <assert.h>#include <time.h>#ifndef AS_RDWR#define AS_RDWR#endif#ifndef AS_PARSE#define AS_PARSE#endif#ifndef GM_PARSE#define GM_PARSE#endif#include "gramod.h"#include "io.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "gendb"#define DESCRIPTION "generate a random database"#define VERSION "version 2.4 (2004.08.12) " \ "(c) 1995-2004 Christian Borgelt"/* --- error codes --- */#define OK 0 /* no error */#define E_NONE 0 /* no error */#define E_NOMEM (-1) /* not enough memory */#define E_FOPEN (-2) /* file open failed */#define E_FREAD (-3) /* file read failed */#define E_FWRITE (-4) /* file write failed */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* wrong number of arguments */#define E_PARSE (-8) /* parse error */#define E_TPLCNT (-9) /* illegal number of tuples */#define E_EMPTY (-10) /* empty attribute domain */#define E_LOOP (-11) /* loop in conditions */#define E_UNKNOWN (-12) /* unknown error *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/const char *errmsgs[] = { /* error messages */ /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_PARSE -8 */ "parse error(s) on file %s\n", /* E_TPLCNT -9 */ "invalid number of tuples\n", /* E_EMPTY -10 */ "domain of attribute %s is empty\n", /* E_LOOP -11 */ "condition loop detected\n", /* E_UNKNOWN -12 */ "unknown error\n"};/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/const char *prgname = NULL; /* program name for error messages */static SCAN *scan = NULL; /* scanner */static ATTSET *attset = NULL; /* attribute set */static GRAMOD *gramod = NULL; /* graphical model (Bayesian network) */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Random Number Functions----------------------------------------------------------------------*/#ifdef DRAND48 /* if library for drand48() available */extern void srand48 (long seed);extern double drand48 (void); /* use drand48 functions */#define dseed(s) srand48((long)(s))#define drand drand48#else /* if only standard rand() available */#define dseed(s) srand((unsigned)(s))static double drand (void) /* compute value from rand() result */{ return rand()/(RAND_MAX +1.0); }#endif/*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/void error (int code, ...){ /* --- print error message */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print the error message */ va_end(args); /* end argument evaluation */ } #ifndef NDEBUG if (gramod) gm_delete(gramod, 0); if (attset) as_delete(attset);/* clean up memory */ if (scan) sc_delete(scan); /* and close files */ if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0; /* loop variables, counter */ char *s; /* to traverse options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *blank = NULL; /* blank */ char *fldsep = NULL; /* field separator */ char *recsep = NULL; /* record separator */ int flags = AS_ATT; /* table file write flags */ int expand = 0; /* whether to expand the distribution */ int tplcnt = 1000; /* number of tuples to generate */ long seed; /* seed for random number generator */ double prob; /* probability of a tuple */ ATT *att; /* to traverse the attributes */ INST *inst; /* to traverse the att. instances */ prgname = argv[0]; /* get program name for error msgs. */ seed = (long)time(NULL); /* and get a default seed value */ /* --- print startup/usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no argument given */ printf("usage: %s [options] pdfile dbfile\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-n# number of tuples to generate " "(default: %d)\n", tplcnt); printf("-s# seed for random number generator " "(default: time)\n"); printf("-x expand the distribution (do not sample)\n"); printf("-w do not write field names to the output file\n"); printf("-a align fields/columns (default: do not align)\n"); printf("-b/f/r# blank character, field and record separator\n"); printf(" (default: \" \", \" \", \"\\n\")\n"); printf("pdfile file containing probability distributions\n"); printf("dbfile file to write generated tuples to\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case 'n': tplcnt = (int)strtol(s, &s, 0); break; case 's': seed = strtol(s, &s, 0); break; case 'x': expand = 1; break; case 'w': flags &= AS_ATT; break; case 'a': flags |= AS_ALIGN; break; case 'b': optarg = ␣ break; case 'f': optarg = &fldsep; break; case 'r': optarg = &recsep; break; default : error(E_OPTION, *(--s)); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-options */ case 0: fn_in = s; break; case 1: fn_out = s; break; default: error(E_ARGCNT); break; } /* note input/output filename */ } } if (optarg) error(E_OPTARG); /* if missing option argument */ if (k != 2) error(E_ARGCNT); /* if too few arguments given */ if (tplcnt < 0) error(E_TPLCNT); /* or tuple count invalid, abort */ if (expand) flags |= AS_WEIGHT; /* set the weight output flag */ /* --- read probability distributions --- */ scan = sc_create(fn_in); /* create a scanner */ if (!scan) error((!fn_in || !*fn_in) ? E_NOMEM : E_FOPEN, fn_in); attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM); /* create an attribute set */ fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if ((sc_nexter(scan) < 0) /* start scanning (get first token) */ || (as_parse(attset, scan, AT_SYM) != 0) || (as_attcnt(attset) <= 0)) /* parse attribute set */ error(E_PARSE, sc_fname(scan)); for (i = as_attcnt(attset); --i >= 0; ) { att = as_att(attset, i); /* traverse the attributes */ if (att_valcnt(att) <= 0) error(E_EMPTY, att_name(att)); } /* check for empty domains */ gramod = gm_parse(attset, scan, GM_PROB|GM_ALL); if (!gramod || !sc_eof(scan)) /* parse the distributions */ error(E_PARSE, sc_fname(scan)); if (gm_check(gramod, -1, 0) != 0) error(E_LOOP); /* check conditions to detect loops */ fprintf(stderr, "[%d attribute(s)] done.\n", as_attcnt(attset)); sc_delete(scan); scan = NULL; /* delete the scanner */ /* --- generate database --- */ if (fn_out && *fn_out) /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file for writing */ else { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to std. output */ fprintf(stderr, "writing %s ... ", fn_out); if (!out) error(E_FOPEN, fn_out); if ((flags & AS_ATT) /* if to write a table header */ && (as_write(attset, out, flags) != 0)) error(E_FWRITE, fn_out); /* write the attributes names */ flags = AS_INST | (flags & ~AS_ATT); if (expand) { /* if to expand the distribution */ for (i = as_attcnt(attset); --i >= 0; ) { att = as_att(attset, i); /* traverse the attributes */ att_inst(att)->i = att_valcnt(att) -1; } /* set the last value of each domain */ do { /* generate tuples */ prob = gm_exec(gramod); /* determine tuple probability */ as_setwgt(attset, (float)(prob *tplcnt)); if (as_write(attset, out, flags) != 0) error(E_FWRITE,fn_out); /* write the generated tuple */ for (i = as_attcnt(attset); --i >= 0; ) { att = as_att(attset, i); /* traverse the attributes */ inst = att_inst(att); /* and their instances and */ if (--inst->i >= 0) break; /* set the next att. value */ inst->i = att_valcnt(att) -1; /* or the last value */ } /* (compute next value combination) */ } while (i >= 0); } /* while there is another combination */ else { /* if to sample from the distrib. */ dseed(seed); /* init. random number generator */ for (i = tplcnt; --i >= 0;){/* generate random tuples */ gm_rand(gramod, drand); /* instantiate the attribute set */ if (as_write(attset, out, flags) != 0) error(E_FWRITE,fn_out); /* write the generated tuple */ } /* to the output file */ } if (out != stdout) { /* if not written to stdout */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_out); } /* print a success message */ fprintf(stderr, "[%d tuple(s)] done.\n", tplcnt); /* --- clean up --- */ #ifndef NDEBUG gm_delete(gramod, 1); /* delete the graphical model */ #endif /* (i.e. the Bayesian network) */ #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -