📄 apriori.c
字号:
/*---------------------------------------------------------------------- File : apriori.c Contents: apriori algorithm for finding association rules Author : Christian Borgelt History : 14.02.1996 file created 26.07.1996 output precision reduced 22.11.1996 options -b, -f, and -r added 24.11.1996 option -e added (add. evaluation measures) 18.08.1997 normalized chi^2 measure added option -m (minimal rule length) added 13.10.1997 quiet version (no output to stdout or stderr) 27.01.1998 adapted to changed ist_create() function 08.08.1998 optional input file (item appearances) added 02.09.1998 several assertions added 07.09.1998 hyperedge mode (option -h) added 08.12.1998 output of absolute support (option -a) added float changed to double 09.12.1998 conversion of names to a scanable form added 05.02.1999 long int changed to int 09.02.1999 input from stdin, output to stdout added 09.08.1999 bug in check of support parameter (<= 0) fixed 05.11.1999 rule evaluation measure EM_AIMP added 08.11.1999 output of add. rule eval. measure value added 16.03.2000 optional use of original rule support definition 01.04.2001 option -h replaced by option -t (target type) 26.05.2001 extended support output added (option -x) 09.06.2001 extended support output for item sets added 15.08.2001 module scan used for output formatting 18.11.2001 item and transaction functions made a module 19.11.2001 options -i, -l changed, option -y removed 28.12.2001 adapted to module tract, some improvements 11.01.2002 evaluation measures codes changed to letters 10.02.2002 option -q extended by a direction parameter 11.02.2002 memory usage minimization option added 09.06.2002 arbitrary supp./conf. formats made possible 09.01.2003 option -k (item separator) added 14.01.2003 check for empty transaction set added 12.03.2003 output of lift value (conf/prior) added 17.07.2003 item filtering w.r.t. usage added (option -u) 17.07.2003 sorting w.r.t. transaction size sum added 18.07.2003 maximal itemset filter added 11.08.2003 closed itemset filter added 15.08.2003 item filtering for transaction tree added 16.08.2003 parameter for transaction filtering added 18.08.2003 dynamic filtering decision based on times added 21.08.2003 option -j (heap sort for transactions) added 22.09.2003 meaning of option -j reversed (heapsort default) 25.03.2004 option -S added (maximal support of a set/rule) 09.05.2004 additional selection measure for sets added 28.10.2004 two unnecessary assignments removed 20.11.2004 bug in evaluation of -j (heap/quicksort) fixed 23.11.2004 absolute/relative support output changed 09.12.2004 semantics of option -p changed 25.01.2005 bug in output of absolute/relative support fixed 31.01.2005 another bug in this output fixed 20.06.2005 use of flag for "no item sorting" corrected----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "apriori"#define DESCRIPTION "find association rules with the apriori algorithm"#define VERSION "version 4.27 (2005.06.20) " \ "(c) 1996-2005 Christian Borgelt"/* --- target types --- */#define TT_SET 0 /* frequent item sets */#define TT_MFSET 1 /* maximally frequent item sets */#define TT_CLSET 2 /* closed item sets */#define TT_RULE 3 /* association rules */#define TT_HEDGE 4 /* association hyperedges *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_TARGET (-9) /* invalid target type */#define E_SUPP (-10) /* invalid support */#define E_CONF (-11) /* invalid confidence */#define E_MEASURE (-12) /* invalid evaluation measure */#define E_MVAL (-13) /* invalid value for measure */#define E_RULELEN (-14) /* invalid rule length */#define E_NOTAS (-15) /* no items or transactions */#define E_UNKNOWN (-21) /* unknown error */#ifndef QUIET /* if not quiet version */#ifdef FFLUSH#define MSG(x) x /* print messages */#else /* if to flush every output */#define MSG(x) x, fflush(stderr)#endif#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \ + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s) tfs_buf(is_tfscan(s))/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#ifndef QUIET /* if not quiet version *//* --- target types --- */static const char *ttypes[] = { /* TT_SET 0 */ "set", /* TT_MFSET 1 */ "set", /* TT_CLSET 2 */ "set", /* TT_RULE 3 */ "rule", /* TT_HEDGE 4 */ "hyperedge",};/* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_TARGET -9 */ "invalid target type '%c'\n", /* E_SUPP -10 */ "invalid minimal support %g%%\n", /* E_CONF -11 */ "invalid minimal confidence %g%%\n", /* E_MEASURE -12 */ "invalid additional evaluation measure %c\n", /* E_MVAL -13 */ "invalid value %g%% for evaluation measure\n", /* E_RULELEN -14 */ "invalid set size/rule length %d\n", /* E_NOTAS -15 */ "no items or transactions to work on\n", /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_APPEXP -18 */ "file %s, record %d: " "appearance indicator expected\n", /* E_UNKAPP -19 */ "file %s, record %d: " "unknown appearance indicator %s\n", /* E_FLDCNT -20 */ "file %s, record %d: too many fields\n", /* E_UNKNOWN -21 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static TATREE *tatree = NULL; /* transaction tree */static ISTREE *istree = NULL; /* item set tree */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void help (void){ /* --- print help on eval. measures */ #ifndef QUIET fprintf(stderr, "\n"); /* terminate startup message */ printf("additional evaluation measures (option -e#)\n"); printf("frequent item sets:\n"); printf("d or 1: binary logarithm of support quotient\n"); printf("q or 2: difference of support quotient to 1\n"); printf("association rules:\n"); printf("d or 1: absolute confidence difference to prior\n"); printf("q or 2: absolute difference of confidence quotient to 1\n"); printf("a or 3: absolute difference of improvement value to 1\n"); printf("i or 4: information difference to prior\n"); printf("c or 5: normalized chi^2 measure\n"); #endif exit(0); /* abort the program */} /* help() *//*--------------------------------------------------------------------*/static void error (int code, ...){ /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (istree) ist_delete(istree); /* clean up memory */ if (tatree) tat_delete(tatree); /* and close files */ if (taset) tas_delete(taset, 0); if (itemset) is_delete(itemset); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *fn_app = NULL; /* name of item appearances file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *cominds = NULL; /* comment indicators */ char *apps = NULL; /* item appearance indicator vector */ double supp = 0.1; /* minimal support (in percent) */ double smax = 1.0; /* maximal support (in percent) */ double conf = 0.8; /* minimal confidence (in percent) */ int rsdef = IST_BODY; /* rule support definition */ int target = 'r'; /* target type (sets/rules/h.edges) */ int arem = 0; /* additional rule evaluation measure */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -