⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.c

📁 数据挖掘中的关联规则算法
💻 C
📖 第 1 页 / 共 3 页
字号:
/*----------------------------------------------------------------------  File    : apriori.c  Contents: apriori algorithm for finding association rules  Author  : Christian Borgelt  History : 14.02.1996 file created            26.07.1996 output precision reduced            22.11.1996 options -b, -f, and -r added            24.11.1996 option -e added (add. evaluation measures)            18.08.1997 normalized chi^2 measure added                       option -m (minimal rule length) added            13.10.1997 quiet version (no output to stdout or stderr)            27.01.1998 adapted to changed ist_create() function            08.08.1998 optional input file (item appearances) added            02.09.1998 several assertions added            07.09.1998 hyperedge mode (option -h) added            08.12.1998 output of absolute support (option -a) added                       float changed to double            09.12.1998 conversion of names to a scanable form added            05.02.1999 long int changed to int            09.02.1999 input from stdin, output to stdout added            09.08.1999 bug in check of support parameter (<= 0) fixed            05.11.1999 rule evaluation measure EM_AIMP added            08.11.1999 output of add. rule eval. measure value added            16.03.2000 optional use of original rule support definition            01.04.2001 option -h replaced by option -t (target type)            26.05.2001 extended support output added (option -x)            09.06.2001 extended support output for item sets added            15.08.2001 module scan used for output formatting            18.11.2001 item and transaction functions made a module            19.11.2001 options -i, -l changed, option -y removed            28.12.2001 adapted to module tract, some improvements            11.01.2002 evaluation measures codes changed to letters            10.02.2002 option -q extended by a direction parameter            11.02.2002 memory usage minimization option added            09.06.2002 arbitrary supp./conf. formats made possible            09.01.2003 option -k (item separator) added            14.01.2003 check for empty transaction set added            12.03.2003 output of lift value (conf/prior) added            17.07.2003 item filtering w.r.t. usage added (option -u)            17.07.2003 sorting w.r.t. transaction size sum added            18.07.2003 maximal itemset filter added            11.08.2003 closed  itemset filter added            15.08.2003 item filtering for transaction tree added            16.08.2003 parameter for transaction filtering added            18.08.2003 dynamic filtering decision based on times added            21.08.2003 option -j (heap sort for transactions) added            22.09.2003 meaning of option -j reversed (heapsort default)            25.03.2004 option -S added (maximal support of a set/rule)            09.05.2004 additional selection measure for sets added            28.10.2004 two unnecessary assignments removed            20.11.2004 bug in evaluation of -j (heap/quicksort) fixed            23.11.2004 absolute/relative support output changed            09.12.2004 semantics of option -p changed            25.01.2005 bug in output of absolute/relative support fixed            31.01.2005 another bug in this output fixed            20.06.2005 use of flag for "no item sorting" corrected----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "apriori"#define DESCRIPTION "find association rules with the apriori algorithm"#define VERSION     "version 4.27 (2005.06.20)        " \                    "(c) 1996-2005   Christian Borgelt"/* --- target types --- */#define TT_SET        0         /* frequent item sets */#define TT_MFSET      1         /* maximally frequent item sets */#define TT_CLSET      2         /* closed item sets */#define TT_RULE       3         /* association rules */#define TT_HEDGE      4         /* association hyperedges *//* --- error codes --- */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* too few/many arguments */#define E_STDIN     (-8)        /* double assignment of stdin */#define E_TARGET    (-9)        /* invalid target type */#define E_SUPP     (-10)        /* invalid support */#define E_CONF     (-11)        /* invalid confidence */#define E_MEASURE  (-12)        /* invalid evaluation measure */#define E_MVAL     (-13)        /* invalid value for measure */#define E_RULELEN  (-14)        /* invalid rule length */#define E_NOTAS    (-15)        /* no items or transactions */#define E_UNKNOWN  (-21)        /* unknown error */#ifndef QUIET                   /* if not quiet version */#ifdef FFLUSH#define MSG(x)        x         /* print messages */#else                           /* if to flush every output */#define MSG(x)        x, fflush(stderr)#endif#else                           /* if quiet version */#define MSG(x)                  /* suppress messages */#endif#define SEC_SINCE(t)  ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s)     (tfs_reccnt(is_tfscan(s)) \                      + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s)     tfs_buf(is_tfscan(s))/*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/#ifndef QUIET                   /* if not quiet version *//* --- target types --- */static const char *ttypes[] = {  /* TT_SET      0 */  "set",  /* TT_MFSET    1 */  "set",  /* TT_CLSET    2 */  "set",  /* TT_RULE     3 */  "rule",  /* TT_HEDGE    4 */  "hyperedge",};/* --- error messages --- */static const char *errmsgs[] = {  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_STDIN    -8 */  "double assignment of standard input\n",  /* E_TARGET   -9 */  "invalid target type '%c'\n",  /* E_SUPP    -10 */  "invalid minimal support %g%%\n",  /* E_CONF    -11 */  "invalid minimal confidence %g%%\n",  /* E_MEASURE -12 */  "invalid additional evaluation measure %c\n",  /* E_MVAL    -13 */  "invalid value %g%% for evaluation measure\n",  /* E_RULELEN -14 */  "invalid set size/rule length %d\n",  /* E_NOTAS   -15 */  "no items or transactions to work on\n",  /* E_ITEMEXP -16 */  "file %s, record %d: item expected\n",  /* E_DUPITEM -17 */  "file %s, record %d: duplicate item %s\n",  /* E_APPEXP  -18 */  "file %s, record %d: "                         "appearance indicator expected\n",  /* E_UNKAPP  -19 */  "file %s, record %d: "                         "unknown appearance indicator %s\n",  /* E_FLDCNT  -20 */  "file %s, record %d: too many fields\n",  /* E_UNKNOWN -21 */  "unknown error\n"};#endif/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char    *prgname;        /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET   *taset   = NULL; /* transaction set */static TATREE  *tatree  = NULL; /* transaction tree */static ISTREE  *istree  = NULL; /* item set tree */static FILE    *in      = NULL; /* input  file */static FILE    *out     = NULL; /* output file *//*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/static void help (void){                               /* --- print help on eval. measures */  #ifndef QUIET  fprintf(stderr, "\n");        /* terminate startup message */  printf("additional evaluation measures (option -e#)\n");  printf("frequent item sets:\n");  printf("d or 1: binary logarithm of support quotient\n");  printf("q or 2: difference of support quotient to 1\n");  printf("association rules:\n");  printf("d or 1: absolute confidence difference to prior\n");  printf("q or 2: absolute difference of confidence quotient to 1\n");  printf("a or 3: absolute difference of improvement value to 1\n");  printf("i or 4: information difference to prior\n");  printf("c or 5: normalized chi^2 measure\n");  #endif  exit(0);                      /* abort the program */}  /* help() *//*--------------------------------------------------------------------*/static void error (int code, ...){                               /* --- print an error message */  #ifndef QUIET                 /* if not quiet version */  va_list    args;              /* list of variable arguments */  const char *msg;              /* error message */  assert(prgname);              /* check the program name */  if (code < E_UNKNOWN) code = E_UNKNOWN;  if (code < 0) {               /* if to report an error, */    msg = errmsgs[-code];       /* get the error message */    if (!msg) msg = errmsgs[-E_UNKNOWN];    fprintf(stderr, "\n%s: ", prgname);    va_start(args, code);       /* get variable arguments */    vfprintf(stderr, msg, args);/* print error message */    va_end(args);               /* end argument evaluation */  }  #endif  #ifndef NDEBUG                /* if debug version */  if (istree)  ist_delete(istree);   /* clean up memory */  if (tatree)  tat_delete(tatree);   /* and close files */  if (taset)   tas_delete(taset, 0);  if (itemset) is_delete(itemset);  if (in  && (in  != stdin))  fclose(in);  if (out && (out != stdout)) fclose(out);  #endif  #ifdef STORAGE                /* if storage debugging */  showmem("at end of program"); /* check memory usage */  #endif  exit(code);                   /* abort the program */}  /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){                               /* --- main function */  int    i, k = 0, n;           /* loop variables, counters */  char   *s;                    /* to traverse the options */  char   **optarg = NULL;       /* option argument */  char   *fn_in   = NULL;       /* name of input  file */  char   *fn_out  = NULL;       /* name of output file */  char   *fn_app  = NULL;       /* name of item appearances file */  char   *blanks  = NULL;       /* blanks */  char   *fldseps = NULL;       /* field  separators */  char   *recseps = NULL;       /* record separators */  char   *cominds = NULL;       /* comment indicators */  char   *apps    = NULL;       /* item appearance indicator vector */  double supp     = 0.1;        /* minimal support    (in percent) */  double smax     = 1.0;        /* maximal support    (in percent) */  double conf     = 0.8;        /* minimal confidence (in percent) */  int    rsdef    = IST_BODY;   /* rule support definition */  int    target   = 'r';        /* target type (sets/rules/h.edges) */  int    arem     = 0;          /* additional rule evaluation measure */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -