📄 apriori.c
字号:
/*---------------------------------------------------------------------- File : apriori.c Contents: apriori algorithm for finding association rules Author : Christian Borgelt History : 14.02.1996 file created 26.07.1996 output precision reduced 22.11.1996 options -b, -f, and -r added 24.11.1996 option -e added (add. evaluation measures) 18.08.1997 normalized chi^2 measure added option -m (minimal rule length) added 13.10.1997 quiet version (no output to stdout or stderr) 27.01.1998 adapted to changed ist_create() function 08.08.1998 optional input file (item appearances) added 02.09.1998 several assertions added 07.09.1998 hyperedge mode (option -h) added 08.12.1998 output of absolute support (option -a) added float changed to double 09.12.1998 conversion of names to a scanable form added 05.02.1999 long int changed to int 09.02.1999 input from stdin, output to stdout added 09.08.1999 bug in check of support parameter (<= 0) fixed 05.11.1999 rule evaluation measure EM_AIMP added 08.11.1999 output of add. rule eval. measure value added 16.03.2000 optional use of original rule support definition 01.04.2001 option -h replaced by option -t (target type) 26.05.2001 extended support output added (option -x) 09.06.2001 extended support output for item sets added 15.08.2001 module scan used for output formatting 18.11.2001 item and transaction functions made a module 19.11.2001 options -i, -l changed, option -y removed 28.12.2001 adapted to module tract, some improvements 11.01.2002 evaluation measures codes changed to letters 10.02.2002 option -q extended by a direction parameter 11.02.2002 memory usage minimization option added----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "apriori"#define DESCRIPTION "find association rules/hyperedges " \ "with apriori algorithm"#define VERSION "version 4.01 (11.04.2002) " \ "(c) 1996-2002 Christian Borgelt"/* --- target types --- */#define TT_SET 0 /* frequent item sets */#define TT_RULE 1 /* association rules */#define TT_HEDGE 2 /* association hyperedges *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_TARGET (-9) /* invalid target type */#define E_SUPP (-10) /* invalid support */#define E_CONF (-11) /* invalid confidence */#define E_MEASURE (-12) /* invalid evaluation measure */#define E_MVAL (-13) /* invalid value for measure */#define E_RULELEN (-14) /* invalid rule length */#define E_UNKNOWN (-21) /* unknown error */#ifndef QUIET /* if not quiet version */#define MSG(x) x /* print messages */#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \ + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s) tfs_buf(is_tfscan(s))/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#ifndef QUIET /* if not quiet version *//* --- target types --- */static const char *ttypes[] = { /* TT_SET 0 */ "set", /* TT_RULE 1 */ "rule", /* TT_HEDGE 2 */ "hyperedge",};/* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_TARGET -9 */ "invalid target type '%c'\n", /* E_SUPP -10 */ "invalid minimal support %g%%\n", /* E_CONF -11 */ "invalid minimal confidence %g%%\n", /* E_MEASURE -12 */ "invalid additional evaluation measure %c\n", /* E_MVAL -13 */ "invalid value %g%% for evaluation measure\n", /* E_RULELEN -14 */ "invalid rule length %d\n", /* -15 */ NULL, /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_APPEXP -18 */ "file %s, record %d: " "appearance indicator expected\n", /* E_UNKAPP -19 */ "file %s, record %d: " "unknown appearance indicator %s\n", /* E_FLDCNT -20 */ "file %s, record %d: too many fields\n", /* E_UNKNOWN -21 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static ISTREE *istree = NULL; /* item set tree */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void help (void){ /* --- print help on eval. measures */ #ifndef QUIET fprintf(stderr, "\n"); /* terminate startup message */ printf("additional rule evaluation measures (option -e#)\n"); printf("d or 1: absolute confidence difference to prior\n"); printf("q or 2: difference of confidence quotient to 1\n"); printf("a or 3: absolute difference of improvement value to 1\n"); printf("i or 4: information difference to prior\n"); printf("c or 5: normalized chi^2 measure\n"); #endif exit(0); /* abort the program */} /* help() *//*--------------------------------------------------------------------*/static void error (int code, ...){ /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ #endif #ifndef QUIET /* if not quiet version */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (istree) ist_delete(istree); if (taset) tas_delete(taset, 0); /* clean up memory */ if (itemset) is_delete(itemset); /* and close files */ if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *fn_app = NULL; /* name of item appearances file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *cominds = NULL; /* comment indicators */ char *apps = NULL; /* item appearance indicator vector */ double supp = 0.1; /* minimal support (in percent) */ double conf = 0.8; /* minimal confidence (in percent) */ int rsdef = IST_BODY; /* rule support definition */ int target = 'r'; /* target type (sets/rules/h.edges) */ int arem = 0; /* additional rule evaluation measure */ double minval = 0.1; /* minimal evaluation measure value */ int minlen = 1; /* minimal rule length */ int maxlen = 5; /* maximal rule length */ int load = 1; /* flag for loading transactions */ int sort = 1; /* flag for item sorting and recoding */ int memopt = 0; /* flag for memory usage optimization */ int c2scf = 0; /* flag for conv. to scanable form */ char *fmt = "%.1f%%"; /* output format for support/conf. */ int abs = 0; /* flag for absolute support output */ int ext = 0; /* flag for extended support output */ int aval = 0; /* flag for add. eval. measure value */ int maxcnt = 0; /* maximal number of items per set */ int tacnt; /* number of transactions */ int *p; /* identifier map, item pointer */ const char *name; /* buffer for item names */ static char buf[4*TFS_SIZE+4];/* buffer for formatting */ clock_t t; /* timer for measurements */ #ifndef QUIET /* if not quiet version */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no arguments given */ printf("usage: %s [options] infile outfile [appfile]\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-t# target type (s: item sets, " "r: rules (default), h: hyperedges)\n"); printf("-m# minimal number of items per set/rule/hyperedge " "(default: %d)\n", minlen); printf("-n# maximal number of items per set/rule/hyperedge " "(default: %d)\n", maxlen); printf("-s# minimal support of a set/rule/hyperedge " "(default: %g%%)\n", supp *100); printf("-c# minimal confidence of a rule/hyperedge " "(default: %g%%)\n", conf *100); printf("-o use original definition of the support of a rule " "(body & head)\n"); printf("-x extended support output " "(print both rule support types)\n"); printf("-a print absolute support " "(number of transactions)\n"); printf("-p print support/confidence with high precision\n"); printf("-e# additional rule evaluation measure " "(default: none)\n"); printf("-! print a list of " "additional rule evaluation measures\n"); printf("-d# minimal value of additional evaluation measure " "(default: %g%%)\n", minval *100); printf("-v print value of additional " "rule evaluation measure\n"); printf("-g write output in scanable form " "(quote certain characters)\n"); printf("-l do not load transactions into memory " "(work on input file)\n"); printf("-q# sort items w.r.t. their frequency (default: 1)\n" " (1: ascending, -1: descending, 0: do not sort)\n"); printf("-z minimize memory usage " "(default: maximize speed)\n"); printf("-i# ignore records starting with characters " "in the given string\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("infile file to read transactions from\n"); printf("outfile file to write item sets/association rules" "/hyperedges to\n"); printf("appfile file stating item appearances (optional)\n"); return 0; /* print a usage message */ } /* and abort the program */ #endif /* #ifndef QUIET */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (*s) { /* traverse options */ switch (*s++) { /* evaluate switches */ case '!': help(); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -