⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.c

📁 apriori算法是数据挖掘的经典算法之1,其基于关联规则的思想.这是我的第2个收藏算法
💻 C
📖 第 1 页 / 共 2 页
字号:
/*----------------------------------------------------------------------  File    : apriori.c  Contents: apriori algorithm for finding association rules  Author  : Christian Borgelt  History : 14.02.1996 file created            26.07.1996 output precision reduced            22.11.1996 options -b, -f, and -r added            24.11.1996 option -e added (add. evaluation measures)            18.08.1997 normalized chi^2 measure added                       option -m (minimal rule length) added            13.10.1997 quiet version (no output to stdout or stderr)            27.01.1998 adapted to changed ist_create() function            08.08.1998 optional input file (item appearances) added            02.09.1998 several assertions added            07.09.1998 hyperedge mode (option -h) added            08.12.1998 output of absolute support (option -a) added                       float changed to double            09.12.1998 conversion of names to a scanable form added            05.02.1999 long int changed to int            09.02.1999 input from stdin, output to stdout added            09.08.1999 bug in check of support parameter (<= 0) fixed            05.11.1999 rule evaluation measure EM_AIMP added            08.11.1999 output of add. rule eval. measure value added            16.03.2000 optional use of original rule support definition            01.04.2001 option -h replaced by option -t (target type)            26.05.2001 extended support output added (option -x)            09.06.2001 extended support output for item sets added            15.08.2001 module scan used for output formatting            18.11.2001 item and transaction functions made a module            19.11.2001 options -i, -l changed, option -y removed            28.12.2001 adapted to module tract, some improvements            11.01.2002 evaluation measures codes changed to letters            10.02.2002 option -q extended by a direction parameter            11.02.2002 memory usage minimization option added----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "apriori"#define DESCRIPTION "find association rules/hyperedges " \                    "with apriori algorithm"#define VERSION     "version 4.01 (11.04.2002)         " \                    "(c) 1996-2002   Christian Borgelt"/* --- target types --- */#define TT_SET        0         /* frequent item sets */#define TT_RULE       1         /* association rules */#define TT_HEDGE      2         /* association hyperedges *//* --- error codes --- */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* too few/many arguments */#define E_STDIN     (-8)        /* double assignment of stdin */#define E_TARGET    (-9)        /* invalid target type */#define E_SUPP     (-10)        /* invalid support */#define E_CONF     (-11)        /* invalid confidence */#define E_MEASURE  (-12)        /* invalid evaluation measure */#define E_MVAL     (-13)        /* invalid value for measure */#define E_RULELEN  (-14)        /* invalid rule length */#define E_UNKNOWN  (-21)        /* unknown error */#ifndef QUIET                   /* if not quiet version */#define MSG(x)        x         /* print messages */#else                           /* if quiet version */#define MSG(x)                  /* suppress messages */#endif#define SEC_SINCE(t)  ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s)     (tfs_reccnt(is_tfscan(s)) \                      + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s)     tfs_buf(is_tfscan(s))/*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/#ifndef QUIET                   /* if not quiet version *//* --- target types --- */static const char *ttypes[] = {  /* TT_SET      0 */  "set",  /* TT_RULE     1 */  "rule",  /* TT_HEDGE    2 */  "hyperedge",};/* --- error messages --- */static const char *errmsgs[] = {  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_STDIN    -8 */  "double assignment of standard input\n",  /* E_TARGET   -9 */  "invalid target type '%c'\n",  /* E_SUPP    -10 */  "invalid minimal support %g%%\n",  /* E_CONF    -11 */  "invalid minimal confidence %g%%\n",  /* E_MEASURE -12 */  "invalid additional evaluation measure %c\n",  /* E_MVAL    -13 */  "invalid value %g%% for evaluation measure\n",  /* E_RULELEN -14 */  "invalid rule length %d\n",  /*           -15 */  NULL,  /* E_ITEMEXP -16 */  "file %s, record %d: item expected\n",  /* E_DUPITEM -17 */  "file %s, record %d: duplicate item %s\n",  /* E_APPEXP  -18 */  "file %s, record %d: "                         "appearance indicator expected\n",  /* E_UNKAPP  -19 */  "file %s, record %d: "                         "unknown appearance indicator %s\n",  /* E_FLDCNT  -20 */  "file %s, record %d: too many fields\n",  /* E_UNKNOWN -21 */  "unknown error\n"};#endif/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char    *prgname;        /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET   *taset   = NULL; /* transaction set */static ISTREE  *istree  = NULL; /* item set tree */static FILE    *in      = NULL; /* input  file */static FILE    *out     = NULL; /* output file *//*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/static void help (void){                               /* --- print help on eval. measures */  #ifndef QUIET  fprintf(stderr, "\n");        /* terminate startup message */  printf("additional rule evaluation measures (option -e#)\n");  printf("d or 1: absolute confidence difference to prior\n");  printf("q or 2: difference of confidence quotient to 1\n");  printf("a or 3: absolute difference of improvement value to 1\n");  printf("i or 4: information difference to prior\n");  printf("c or 5: normalized chi^2 measure\n");  #endif  exit(0);                      /* abort the program */}  /* help() *//*--------------------------------------------------------------------*/static void error (int code, ...){                               /* --- print an error message */  #ifndef QUIET                 /* if not quiet version */  va_list    args;              /* list of variable arguments */  const char *msg;              /* error message */  #endif  #ifndef QUIET                 /* if not quiet version */  assert(prgname);              /* check the program name */  if (code < E_UNKNOWN) code = E_UNKNOWN;  if (code < 0) {               /* if to report an error, */    msg = errmsgs[-code];       /* get the error message */    if (!msg) msg = errmsgs[-E_UNKNOWN];    fprintf(stderr, "\n%s: ", prgname);    va_start(args, code);       /* get variable arguments */    vfprintf(stderr, msg, args);/* print error message */    va_end(args);               /* end argument evaluation */  }  #endif  #ifndef NDEBUG                /* if debug version */  if (istree)  ist_delete(istree);  if (taset)   tas_delete(taset, 0); /* clean up memory */  if (itemset) is_delete(itemset);   /* and close files */  if (in  && (in  != stdin))  fclose(in);  if (out && (out != stdout)) fclose(out);  #endif  #ifdef STORAGE                /* if storage debugging */  showmem("at end of program"); /* check memory usage */  #endif  exit(code);                   /* abort the program */}  /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){                               /* --- main function */  int    i, k = 0, n;           /* loop variables, counters */  char   *s;                    /* to traverse the options */  char   **optarg = NULL;       /* option argument */  char   *fn_in   = NULL;       /* name of input  file */  char   *fn_out  = NULL;       /* name of output file */  char   *fn_app  = NULL;       /* name of item appearances file */  char   *blanks  = NULL;       /* blanks */  char   *fldseps = NULL;       /* field  separators */  char   *recseps = NULL;       /* record separators */  char   *cominds = NULL;       /* comment indicators */  char   *apps    = NULL;       /* item appearance indicator vector */  double supp     = 0.1;        /* minimal support    (in percent) */  double conf     = 0.8;        /* minimal confidence (in percent) */  int    rsdef    = IST_BODY;   /* rule support definition */  int    target   = 'r';        /* target type (sets/rules/h.edges) */  int    arem     = 0;          /* additional rule evaluation measure */  double minval   = 0.1;        /* minimal evaluation measure value */  int    minlen   = 1;          /* minimal rule length */  int    maxlen   = 5;          /* maximal rule length */  int    load     = 1;          /* flag for loading transactions */  int    sort     = 1;          /* flag for item sorting and recoding */  int    memopt   = 0;          /* flag for memory usage optimization */  int    c2scf    = 0;          /* flag for conv. to scanable form */  char   *fmt     = "%.1f%%";   /* output format for support/conf. */  int    abs      = 0;          /* flag for absolute support output */  int    ext      = 0;          /* flag for extended support output */  int    aval     = 0;          /* flag for add. eval. measure value */  int    maxcnt   = 0;          /* maximal number of items per set */  int    tacnt;                 /* number of transactions */  int    *p;                    /* identifier map, item pointer */  const  char *name;            /* buffer for item names */  static char buf[4*TFS_SIZE+4];/* buffer for formatting */  clock_t     t;                /* timer for measurements */  #ifndef QUIET                 /* if not quiet version */  prgname = argv[0];            /* get program name for error msgs. */  /* --- print usage message --- */  if (argc > 1) {               /* if arguments are given */    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);    fprintf(stderr, VERSION); } /* print a startup message */  else {                        /* if no arguments given */    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);    printf("%s\n", DESCRIPTION);    printf("%s\n", VERSION);    printf("-t#      target type (s: item sets, "                    "r: rules (default), h: hyperedges)\n");    printf("-m#      minimal number of items per set/rule/hyperedge "                    "(default: %d)\n", minlen);    printf("-n#      maximal number of items per set/rule/hyperedge "                    "(default: %d)\n", maxlen);    printf("-s#      minimal support    of a     set/rule/hyperedge "                    "(default: %g%%)\n", supp *100);    printf("-c#      minimal confidence of a         rule/hyperedge "                    "(default: %g%%)\n", conf *100);    printf("-o       use original definition of the support of a rule "                    "(body & head)\n");    printf("-x       extended support output "                    "(print both rule support types)\n");    printf("-a       print absolute support "                    "(number of transactions)\n");    printf("-p       print support/confidence with high precision\n");    printf("-e#      additional rule evaluation measure "                    "(default: none)\n");    printf("-!       print a list of "                    "additional rule evaluation measures\n");    printf("-d#      minimal value of additional evaluation measure "                    "(default: %g%%)\n", minval *100);    printf("-v       print value of additional "                    "rule evaluation measure\n");    printf("-g       write output in scanable form "                    "(quote certain characters)\n");    printf("-l       do not load transactions into memory "                    "(work on input file)\n");    printf("-q#      sort items w.r.t. their frequency (default: 1)\n"           "         (1: ascending, -1: descending, 0: do not sort)\n");    printf("-z       minimize memory usage "                    "(default: maximize speed)\n");    printf("-i#      ignore records starting with characters "                    "in the given string\n");    printf("-b/f/r#  blank characters, field and record separators\n"           "         (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");    printf("infile   file to read transactions from\n");    printf("outfile  file to write item sets/association rules"                    "/hyperedges to\n");    printf("appfile  file stating item appearances (optional)\n");    return 0;                   /* print a usage message */  }                             /* and abort the program */  #endif  /* #ifndef QUIET */  /* --- evaluate arguments --- */  for (i = 1; i < argc; i++) {  /* traverse arguments */    s = argv[i];                /* get option argument */    if (optarg) { *optarg = s; optarg = NULL; continue; }    if ((*s == '-') && *++s) {  /* -- if argument is an option */      while (*s) {              /* traverse options */        switch (*s++) {         /* evaluate switches */          case '!': help();                         break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -