📄 eclat.c
字号:
/*---------------------------------------------------------------------- File : eclat.c Contents: eclat algorithm for finding frequent item sets Author : Christian Borgelt History : 09.06.2002 file created from apriori.c 10.12.2002 option -l (list supporting transactions added) 16.08.2002 transaction reading improved 18.08.2003 memory benchmarking functionality added 20.08.2003 option -t (target type) added 22.08.2003 based on transaction module from apriori 23.08.2003 option -q (item sort control) added 12.09.2003 option -u (sparse representation) added 16.08.2004 bug concerning option -q0 fixed (min. support) 23.11.2004 absolute/relative support output changed 09.12.2004 filter added (binary logarithm of supp. quotient) 20.06.2005 use of flag for "no item sorting" corrected----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "bitmat.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "eclat"#define DESCRIPTION "find frequent item sets with the eclat algorithm"#define VERSION "version 2.10 (2005.06.20) " \ "(c) 2002-2005 Christian Borgelt"/* --- output flags --- */#define OF_REL 0x01 /* print relative support */#define OF_ABS 0x02 /* print absolute support */#define OF_DEV 0x04 /* print deviation from indep. occ. */#define OF_SCANF 0x08 /* convert names to scanable form */#define OF_LIST 0x10 /* list supporting transactions *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_TARGET (-9) /* invalid target type */#define E_SUPP (-10) /* invalid item set support */#define E_ITEMCNT (-11) /* invalid number of items */#define E_NOTAS (-12) /* no items or transactions */#define E_UNKNOWN (-18) /* unknown error */#ifndef QUIET /* if not quiet version */#define MSG(x) x /* print messages */#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \ + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s) tfs_buf(is_tfscan(s))/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) */#ifndef QUIET /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_TARGET -9 */ "invalid target type '%c'\n", /* E_SUPP -10 */ "invalid minimal support %g%%\n", /* E_ITEMCNT -11 */ "invalid number of items %d\n", /* E_NOTAS -12 */ "no items or transactions to work on\n", /* -13 to -15 */ NULL, NULL, NULL, /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_UNKNOWN -18 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static BITMAT *bitmat = NULL; /* bit matrix */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file */static int tacnt = 0; /* number of transactions */static double mindev = -DBL_MAX; /* minimal value of deviation */static double *logfs = NULL; /* logarithms of item frequencies */static int flags = OF_REL; /* output flags */static char *fmt = "%.1f"; /* output format for support */static char buf[4*TFS_SIZE+4]; /* buffer for formatting *//*---------------------------------------------------------------------- Item Set Report Function----------------------------------------------------------------------*/static int _report (int *ids, int cnt, int supp, int *tal, void *data){ /* --- report a frequent item set */ int i; /* loop variable */ const char *name; /* buffer for item name */ double dev = 0; /* deviation from indep. occurrence */ assert(ids && tal); /* check the function arguments */ if (flags & OF_DEV) { /* if to filter w.r.t. deviation */ for (i = cnt; --i >= 0; ) /* traverse the items and */ dev += logfs[ids[i]]; /* sum the logs of the item freqs. */ dev = (log(abs(supp)/(double)tacnt) -dev) /LN_2; if (dev < mindev) return 0; /* compute and check the deviation */ } /* from an independent occurrence */ for (i = 0; i < cnt; i++) { /* traverse the item set */ name = is_name(itemset, ids[i]); if (flags & OF_SCANF) { /* convert to scanable form */ sc_format(buf, name, 0); name = buf; } fputs(name, out); fputc(' ', out); } /* print the item names */ fputs(" (", out); /* print rel. and abs. support */ if (flags & OF_REL) { fprintf(out, fmt, (abs(supp)/(double)tacnt) *100); fputs((flags & OF_ABS) ? "%/" : "%", out); } if (flags & OF_ABS) { fprintf(out, "%d", abs(supp)); } if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); } fputc(')', out); /* terminate the support output */ if (flags & OF_LIST) { /* if to list the transactions, */ fputc(' ', out); /* leave a space before the list */ if (supp < 0) { /* if bit vector representation */ for (i = 0; i < tacnt; i++) { /* traverse the bit vector */ if (tal[i >> BM_SHIFT] & (1 << (i & BM_MASK))) fprintf(out," %d", i+1); } } /* print the indices of set bits */ else { /* if list of transaction ids */ for (i = 0; i < supp; i++) fprintf(out," %d", tal[i]); } /* traverse and print */ } /* the transaction identifiers */ fputc('\n', out); /* terminate the output line */ return 1; /* return 'reported 1 item set' */} /* _report() *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void error (int code, ...){ /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (bitmat) bm_delete(bitmat); /* clean up memory */ if (taset) tas_delete(taset, 0); /* and close files */ if (itemset) is_delete(itemset); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *cominds = NULL; /* comment indicators */ double supp = 0.1; /* minimal support (in percent) */ int target = 's'; /* target type (sets/closed/maximal) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -