⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.c

📁 数据挖掘中的关联规则算法
💻 C
📖 第 1 页 / 共 3 页
字号:
  int    lift     = 0;          /* flag for printing the lift */  double minval   = 0.1;        /* minimal evaluation measure value */  double lftval   = 0;          /* lift value (confidence/prior) */  int    minlen   = 1;          /* minimal rule length */  int    maxlen   = 5;          /* maximal rule length */  int    load     = 1;          /* flag for loading transactions */  int    sort     = 2;          /* flag for item sorting and recoding */  double filter   = 0.1;        /* item usage filtering parameter */  int    tree     = 1;          /* flag for transaction tree */  int    heap     = 1;          /* flag for heap sort vs. quick sort */  int    memopt   = 0;          /* flag for memory usage optimization */  int    c2scf    = 0;          /* flag for conv. to scanable form */  char   *sep     = " ";        /* item separator for output */  char   *fmt     = "%.1f";     /* output format for support/conf. */  int    sout     = 1;          /* flag for abs./rel. support output */  int    ext      = 0;          /* flag for extended support output */  int    aval     = 0;          /* flag for add. eval. measure value */  int    maxcnt   = 0;          /* maximal number of items per set */  int    tacnt;                 /* number of transactions */  int    *map, *set;            /* identifier map, item set */  const  char *name;            /* buffer for item names */  static char buf[4*TFS_SIZE+4];/* buffer for formatting */  clock_t     t, tt, tc, x;     /* timer for measurements */  #ifndef QUIET                 /* if not quiet version */  prgname = argv[0];            /* get program name for error msgs. */  /* --- print usage message --- */  if (argc > 1) {               /* if arguments are given */    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);    fprintf(stderr, VERSION); } /* print a startup message */  else {                        /* if no arguments given */    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);    printf("%s\n", DESCRIPTION);    printf("%s\n", VERSION);    printf("-t#      target type (default: association rules)\n"           "         (s: item sets, c: closed item sets,"                    " m: maximal item sets,\n"           "          r: association rules,"                    " h: association hyperedges)\n");    printf("-m#      minimal number of items per set/rule/hyperedge "                    "(default: %d)\n", minlen);    printf("-n#      maximal number of items per set/rule/hyperedge "                    "(default: %d)\n", maxlen);    printf("-s#      minimal support    of a     set/rule/hyperedge "                    "(default: %g%%)\n", supp *100);    printf("-S#      maximal support    of a     set/rule/hyperedge "                    "(default: %g%%)\n", smax *100);    printf("-c#      minimal confidence of a         rule/hyperedge "                    "(default: %g%%)\n", conf *100);    printf("-o       use original definition of the support of a rule "                    "(body & head)\n");    printf("-k#      item separator for output "                    "(default: \"%s\")\n", sep);    printf("-p#      output format for support/confidence "                    "(default: \"%s\")\n", fmt);    printf("-x       extended support output "                    "(print both rule support types)\n");    printf("-a       print absolute support "                    "(number of transactions)\n");    printf("-y       print lift value (confidence divided by prior)\n");    printf("-e#      additional evaluation measure (default: none)\n");    printf("-!       print a list of additional evaluation measures\n");    printf("-d#      minimal value of additional evaluation measure "                    "(default: %g%%)\n", minval *100);    printf("-v       print value of additional "                    "rule evaluation measure\n");    printf("-g       write output in scanable form "                    "(quote certain characters)\n");    printf("-l       do not load transactions into memory "                    "(work on input file)\n");    printf("-q#      sort items w.r.t. their frequency (default: %d)\n"           "         (1: ascending, -1: descending, 0: do not sort,\n"           "          2: ascending, -2: descending w.r.t. "                    "transaction size sum)\n", sort);    printf("-u#      filter unused items from transactions "                    "(default: %g)\n", filter);    printf("         (0: do not filter items w.r.t. usage in sets,\n"           "         <0: fraction of removed items for filtering,\n"           "         >0: take execution times ratio into account)\n");    printf("-h       do not organize transactions as a prefix tree\n");    printf("-j       use quicksort to sort the transactions "                    "(default: heapsort)\n");    printf("-z       minimize memory usage "                    "(default: maximize speed)\n");    printf("-i#      ignore records starting with a character "                    "in the given string\n");    printf("-b/f/r#  blank characters, field and record separators\n"           "         (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");    printf("infile   file to read transactions from\n");    printf("outfile  file to write item sets/association rules"                    "/hyperedges to\n");    printf("appfile  file stating item appearances (optional)\n");    return 0;                   /* print a usage message */  }                             /* and abort the program */  #endif  /* #ifndef QUIET */  /* --- evaluate arguments --- */  for (i = 1; i < argc; i++) {  /* traverse arguments */    s = argv[i];                /* get option argument */    if (optarg) { *optarg = s; optarg = NULL; continue; }    if ((*s == '-') && *++s) {  /* -- if argument is an option */      while (*s) {              /* traverse options */        switch (*s++) {         /* evaluate switches */          case '!': help();                         break;          case 't': target = (*s) ? *s++ : 'r';     break;          case 'm': minlen = (int)strtol(s, &s, 0); break;          case 'n': maxlen = (int)strtol(s, &s, 0); break;          case 's': supp   = 0.01*strtod(s, &s);    break;          case 'S': smax   = 0.01*strtod(s, &s);    break;          case 'c': conf   = 0.01*strtod(s, &s);    break;          case 'o': rsdef  = IST_BOTH;              break;          case 'k': optarg = &sep;                  break;          case 'p': optarg = &fmt;                  break;          case 'x': ext    = 1;                     break;          case 'a': sout  |= 2;                     break;          case 'y': lift   = 1;                     break;          case 'e': arem   = (*s) ? *s++ : 0;       break;          case 'd': minval = 0.01*strtod(s, &s);    break;          case 'v': aval   = 1;                     break;          case 'g': c2scf  = 1;                     break;          case 'l': load   = 0;                     break;          case 'q': sort   = (int)strtol(s, &s, 0); break;          case 'u': filter =      strtod(s, &s);    break;          case 'h': tree   = 0;                     break;          case 'j': heap   = 0;                     break;          case 'z': memopt = 1;                     break;          case 'i': optarg = &cominds;              break;          case 'b': optarg = &blanks;               break;          case 'f': optarg = &fldseps;              break;          case 'r': optarg = &recseps;              break;          default : error(E_OPTION, *--s);          break;        }                       /* set option variables */        if (optarg && *s) { *optarg = s; optarg = NULL; break; }      } }                       /* get option argument */    else {                      /* -- if argument is no option */      switch (k++) {            /* evaluate non-options */        case  0: fn_in  = s;      break;        case  1: fn_out = s;      break;        case  2: fn_app = s;      break;        default: error(E_ARGCNT); break;      }                         /* note filenames */    }  }  if (optarg) error(E_OPTARG);  /* check option argument */  if ((k < 2) || (k > 3))       /* and the number of arguments */    error(E_ARGCNT);            /* (either in/out or in/out/app) */  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))    error(E_STDIN);             /* stdin must not be used twice */  switch (target) {             /* check and translate target type */    case 's': target = TT_SET;               break;    case 'm': target = TT_MFSET;             break;    case 'c': target = TT_CLSET;             break;    case 'r': target = TT_RULE;              break;    case 'h': target = TT_HEDGE;             break;    default : error(E_TARGET, (char)target); break;  }  if (supp > 1)                 /* check the minimal support */    error(E_SUPP, supp);        /* (< 0: absolute number) */  if ((conf  <  0) || (conf > 1))    error(E_CONF, conf);        /* check the minimal confidence */  if (minlen <= 0) error(E_RULELEN, minlen);  /* check the limits */  if (maxlen <= 0) error(E_RULELEN, maxlen);  /* for the rule length */  switch (arem) {               /* check and translate measure */    case  0 : case '0': arem = EM_NONE;     break;    case 'd': case '1': arem = EM_DIFF;     break;    case 'q': case '2': arem = EM_QUOT;     break;    case 'a': case '3': arem = EM_AIMP;     break;    case 'i': case '4': arem = EM_INFO;     break;    case 'c': case '5': arem = EM_CHI2;     break;    default : error(E_MEASURE, (char)arem); break;  }  if ((minval < 0)              /* check the measure parameter */  || ((target == TT_RULE) && (arem != EM_AIMP) && (minval > 1)))    error(E_MVAL, minval);      /* (must usually be between 0 and 1) */  if      (target == TT_HEDGE){ /* in hyperedge mode */    minval = conf;    conf = 1;}/* adapt the parameters */  else if (target <= TT_CLSET){ /* in item set mode neutralize */    rsdef = IST_BOTH; conf = 1;}/* rule specific settings */  if (arem == EM_NONE)          /* if no add. rule eval. measure, */    aval = 0;                   /* clear the corresp. output flag */  if ((filter <= -1) || (filter >= 1)) filter = 0;  /* --- create item set and transaction set --- */  itemset = is_create();        /* create an item set and */  if (!itemset) error(E_NOMEM); /* set the special characters */  is_chars(itemset, blanks, fldseps, recseps, cominds);  if (load) {                   /* if to load the transactions */    taset = tas_create(itemset);    if (!taset) error(E_NOMEM); /* create a transaction set */  }                             /* to store the transactions */  MSG(fprintf(stderr, "\n"));   /* terminate the startup message */  /* --- read item appearances --- */  if (fn_app) {                 /* if item appearances are given */    t = clock();                /* start the timer */    if (*fn_app)                /* if an app. file name is given, */      in = fopen(fn_app, "r");  /* open the item appearances file */    else {                      /* if no app. file name is given, */      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */    MSG(fprintf(stderr, "reading %s ... ", fn_app));    if (!in) error(E_FOPEN, fn_app);    k = is_readapp(itemset,in); /* read the item appearances */    if (k  != 0) error(k, fn_app, RECCNT(itemset), BUFFER(itemset));    if (in != stdin)            /* if not read from standard input, */      fclose(in);               /* close the input file */    MSG(fprintf(stderr, "[%d item(s)] done ", is_cnt(itemset)));    MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));  }                             /* print a log message */  /* --- read transactions --- */  t = clock();                  /* start the timer */  if (fn_in && *fn_in)          /* if an input file name is given, */    in = fopen(fn_in, "r");     /* open input file for reading */  else {                        /* if no input file name is given, */    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */  MSG(fprintf(stderr, "reading %s ... ", fn_in));  if (!in) error(E_FOPEN, fn_in);  for (tacnt = 0; 1; tacnt++) { /* transaction read loop */    k = is_read(itemset, in);   /* read the next transaction */    if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));    if (k > 0) break;           /* check for error and end of file */    k = is_tsize(itemset);      /* update the maximal */    if (k > maxcnt) maxcnt = k; /* transaction size */    if (taset && (tas_add(taset, NULL, 0) != 0))      error(E_NOMEM);           /* add the loaded transaction */  }                             /* to the transaction set */  if (taset) {                  /* if transactions have been loaded */    if (in != stdin) fclose(in);/* if not read from standard input, */    in = NULL;                  /* close the input file */  }                             /* clear the file variable */  n = is_cnt(itemset);          /* get the number of items */  MSG(fprintf(stderr, "[%d item(s),", n));  MSG(fprintf(stderr, " %d transaction(s)] done ", tacnt));  MSG(fprintf(stderr, "[%.2fs].", SEC_SINCE(t)));  if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS);  MSG(fprintf(stderr, "\n"));   /* check for at least one transaction */  if (supp < 0) {               /* if absolute support is given */    if (!(sout & 2)) sout = 2;  /* switch to absolute support output */    supp = (-100 *supp -0.25) /tacnt;    if (supp < 0) supp = 0;     /* compute a proper relative support */  }                             /* and check it against 0 */  if (smax < 0) {               /* if absolute support is given */    smax = (-100 *smax -0.25) /tacnt;    if (smax < 0) smax = 0;     /* compute a proper relative support */  }                             /* and check it against 0 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -