📄 fpgrowth.c
字号:
/*---------------------------------------------------------------------- File : fpgrowth.c Contents: fpgrowth algorithm for finding frequent item sets Author : Christian Borgelt History : 21.11.2004 file created from eclat.c 22.11.2004 pruning of tree projection to bonsai added 23.11.2004 absolute/relative support output changed 09.12.2004 filter added (binary logarithm of supp. quotient) 20.06.2005 use of flag for "no item sorting" corrected----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "fptree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "fpgrowth"#define DESCRIPTION "find frequent item sets " \ "with the fpgrowth algorithm"#define VERSION "version 1.8 (2005.12.06) " \ "(c) 2004-2005 Christian Borgelt"/* --- output flags --- */#define OF_REL 0x01 /* print relative support */#define OF_ABS 0x02 /* print absolute support */#define OF_DEV 0x04 /* print deviation from indep. occ. */#define OF_SCANF 0x08 /* convert names to scanable form *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_SUPP (-9) /* invalid item set support */#define E_ITEMCNT (-10) /* invalid number of items */#define E_NOTAS (-11) /* no items or transactions */#define E_UNKNOWN (-18) /* unknown error */#ifndef QUIET /* if not quiet version */#define MSG(x) x /* print messages */#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \ + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s) tfs_buf(is_tfscan(s))/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) */#ifndef QUIET /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_SUPP -9 */ "invalid minimal support %g%%\n", /* E_ITEMCNT -10 */ "invalid number of items %d\n", /* E_NOTAS -11 */ "no items or transactions to work on\n", /* -12 to -15 */ NULL, NULL, NULL, NULL, /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_UNKNOWN -18 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static FPTREE *fptree = NULL; /* frequent pattern tree */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file */static int tacnt = 0; /* number of transactions */static double mindev = -DBL_MAX; /* minimal value of deviation */static double *logfs = NULL; /* logarithms of item frequencies */static int flags = OF_REL; /* output flags */static char *fmt = "%.1f"; /* output format for support */static char buf[4*TFS_SIZE+4]; /* buffer for formatting *//*---------------------------------------------------------------------- Item Set Report Function----------------------------------------------------------------------*/static int _report (int *ids, int cnt, int supp, void *data){ /* --- report a frequent item set */ int i; /* loop variable */ const char *name; /* buffer for item name */ double dev = 0; /* deviation from indep. occurrence */ assert(ids); /* check the function arguments */ if (flags & OF_DEV) { /* if to filter w.r.t. deviation */ for (i = cnt; --i >= 0; ) /* traverse the items and */ dev += logfs[ids[i]]; /* sum the logs of the item freqs. */ dev = (log(supp/(double)tacnt) -dev) /LN_2; if (dev < mindev) return 0; /* compute and check the deviation */ } /* from an independent occurrence */ for (i = 0; i < cnt; i++) { /* traverse the item set */ name = is_name(itemset, ids[i]); if (flags & OF_SCANF) { /* convert to scanable form */ sc_format(buf, name, 0); name = buf; } fputs(name, out); fputc(' ', out); } /* print the item names */ fputs(" (", out); /* print rel. and abs. support */ if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100); fputs((flags & OF_ABS) ? "%/" : "%", out); } if (flags & OF_ABS) { fprintf(out, "%d", supp); } if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); } fputs(")\n", out); /* terminate the support output */ return 1; /* return 'reported 1 item set' */} /* _report() *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void error (int code, ...){ /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (fptree) fpt_delete(fptree); /* clean up memory */ if (taset) tas_delete(taset, 0); /* and close files */ if (itemset) is_delete(itemset); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *cominds = NULL; /* comment indicators */ double supp = 0.1; /* minimal support (in percent) */ int min = 1; /* minimal size of item set */ int max = 5; /* maximal size of item set */ int sort = -2; /* flag for item sorting and recoding */ int mode = FPT_BONSAI;/* tree projection mode */ int heap = 1; /* flag for heap sort vs. quick sort */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -