📄 apriori.c
字号:
int lift = 0; /* flag for printing the lift */ double minval = 0.1; /* minimal evaluation measure value */ double lftval = 0; /* lift value (confidence/prior) */ int minlen = 1; /* minimal rule length */ int maxlen = 5; /* maximal rule length */ int load = 1; /* flag for loading transactions */ int sort = 2; /* flag for item sorting and recoding */ double filter = 0.1; /* item usage filtering parameter */ int tree = 1; /* flag for transaction tree */ int heap = 1; /* flag for heap sort vs. quick sort */ int memopt = 0; /* flag for memory usage optimization */ int c2scf = 0; /* flag for conv. to scanable form */ char *sep = " "; /* item separator for output */ char *fmt = "%.1f"; /* output format for support/conf. */ int sout = 1; /* flag for abs./rel. support output */ int ext = 0; /* flag for extended support output */ int aval = 0; /* flag for add. eval. measure value */ int maxcnt = 0; /* maximal number of items per set */ int tacnt; /* number of transactions */ int *map, *set; /* identifier map, item set */ const char *name; /* buffer for item names */ static char buf[4*TFS_SIZE+4];/* buffer for formatting */ clock_t t, tt, tc, x; /* timer for measurements */ #ifndef QUIET /* if not quiet version */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no arguments given */ printf("usage: %s [options] infile outfile [appfile]\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-t# target type (default: association rules)\n" " (s: item sets, c: closed item sets," " m: maximal item sets,\n" " r: association rules," " h: association hyperedges)\n"); printf("-m# minimal number of items per set/rule/hyperedge " "(default: %d)\n", minlen); printf("-n# maximal number of items per set/rule/hyperedge " "(default: %d)\n", maxlen); printf("-s# minimal support of a set/rule/hyperedge " "(default: %g%%)\n", supp *100); printf("-S# maximal support of a set/rule/hyperedge " "(default: %g%%)\n", smax *100); printf("-c# minimal confidence of a rule/hyperedge " "(default: %g%%)\n", conf *100); printf("-o use original definition of the support of a rule " "(body & head)\n"); printf("-k# item separator for output " "(default: \"%s\")\n", sep); printf("-p# output format for support/confidence " "(default: \"%s\")\n", fmt); printf("-x extended support output " "(print both rule support types)\n"); printf("-a print absolute support " "(number of transactions)\n"); printf("-y print lift value (confidence divided by prior)\n"); printf("-e# additional evaluation measure (default: none)\n"); printf("-! print a list of additional evaluation measures\n"); printf("-d# minimal value of additional evaluation measure " "(default: %g%%)\n", minval *100); printf("-v print value of additional " "rule evaluation measure\n"); printf("-g write output in scanable form " "(quote certain characters)\n"); printf("-l do not load transactions into memory " "(work on input file)\n"); printf("-q# sort items w.r.t. their frequency (default: %d)\n" " (1: ascending, -1: descending, 0: do not sort,\n" " 2: ascending, -2: descending w.r.t. " "transaction size sum)\n", sort); printf("-u# filter unused items from transactions " "(default: %g)\n", filter); printf(" (0: do not filter items w.r.t. usage in sets,\n" " <0: fraction of removed items for filtering,\n" " >0: take execution times ratio into account)\n"); printf("-h do not organize transactions as a prefix tree\n"); printf("-j use quicksort to sort the transactions " "(default: heapsort)\n"); printf("-z minimize memory usage " "(default: maximize speed)\n"); printf("-i# ignore records starting with a character " "in the given string\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("infile file to read transactions from\n"); printf("outfile file to write item sets/association rules" "/hyperedges to\n"); printf("appfile file stating item appearances (optional)\n"); return 0; /* print a usage message */ } /* and abort the program */ #endif /* #ifndef QUIET */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (*s) { /* traverse options */ switch (*s++) { /* evaluate switches */ case '!': help(); break; case 't': target = (*s) ? *s++ : 'r'; break; case 'm': minlen = (int)strtol(s, &s, 0); break; case 'n': maxlen = (int)strtol(s, &s, 0); break; case 's': supp = 0.01*strtod(s, &s); break; case 'S': smax = 0.01*strtod(s, &s); break; case 'c': conf = 0.01*strtod(s, &s); break; case 'o': rsdef = IST_BOTH; break; case 'k': optarg = &sep; break; case 'p': optarg = &fmt; break; case 'x': ext = 1; break; case 'a': sout |= 2; break; case 'y': lift = 1; break; case 'e': arem = (*s) ? *s++ : 0; break; case 'd': minval = 0.01*strtod(s, &s); break; case 'v': aval = 1; break; case 'g': c2scf = 1; break; case 'l': load = 0; break; case 'q': sort = (int)strtol(s, &s, 0); break; case 'u': filter = strtod(s, &s); break; case 'h': tree = 0; break; case 'j': heap = 0; break; case 'z': memopt = 1; break; case 'i': optarg = &cominds; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (optarg && *s) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-options */ case 0: fn_in = s; break; case 1: fn_out = s; break; case 2: fn_app = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if ((k < 2) || (k > 3)) /* and the number of arguments */ error(E_ARGCNT); /* (either in/out or in/out/app) */ if ((!fn_in || !*fn_in) && (fn_app && !*fn_app)) error(E_STDIN); /* stdin must not be used twice */ switch (target) { /* check and translate target type */ case 's': target = TT_SET; break; case 'm': target = TT_MFSET; break; case 'c': target = TT_CLSET; break; case 'r': target = TT_RULE; break; case 'h': target = TT_HEDGE; break; default : error(E_TARGET, (char)target); break; } if (supp > 1) /* check the minimal support */ error(E_SUPP, supp); /* (< 0: absolute number) */ if ((conf < 0) || (conf > 1)) error(E_CONF, conf); /* check the minimal confidence */ if (minlen <= 0) error(E_RULELEN, minlen); /* check the limits */ if (maxlen <= 0) error(E_RULELEN, maxlen); /* for the rule length */ switch (arem) { /* check and translate measure */ case 0 : case '0': arem = EM_NONE; break; case 'd': case '1': arem = EM_DIFF; break; case 'q': case '2': arem = EM_QUOT; break; case 'a': case '3': arem = EM_AIMP; break; case 'i': case '4': arem = EM_INFO; break; case 'c': case '5': arem = EM_CHI2; break; default : error(E_MEASURE, (char)arem); break; } if ((minval < 0) /* check the measure parameter */ || ((target == TT_RULE) && (arem != EM_AIMP) && (minval > 1))) error(E_MVAL, minval); /* (must usually be between 0 and 1) */ if (target == TT_HEDGE){ /* in hyperedge mode */ minval = conf; conf = 1;}/* adapt the parameters */ else if (target <= TT_CLSET){ /* in item set mode neutralize */ rsdef = IST_BOTH; conf = 1;}/* rule specific settings */ if (arem == EM_NONE) /* if no add. rule eval. measure, */ aval = 0; /* clear the corresp. output flag */ if ((filter <= -1) || (filter >= 1)) filter = 0; /* --- create item set and transaction set --- */ itemset = is_create(); /* create an item set and */ if (!itemset) error(E_NOMEM); /* set the special characters */ is_chars(itemset, blanks, fldseps, recseps, cominds); if (load) { /* if to load the transactions */ taset = tas_create(itemset); if (!taset) error(E_NOMEM); /* create a transaction set */ } /* to store the transactions */ MSG(fprintf(stderr, "\n")); /* terminate the startup message */ /* --- read item appearances --- */ if (fn_app) { /* if item appearances are given */ t = clock(); /* start the timer */ if (*fn_app) /* if an app. file name is given, */ in = fopen(fn_app, "r"); /* open the item appearances file */ else { /* if no app. file name is given, */ in = stdin; fn_app = "<stdin>"; } /* read from std. input */ MSG(fprintf(stderr, "reading %s ... ", fn_app)); if (!in) error(E_FOPEN, fn_app); k = is_readapp(itemset,in); /* read the item appearances */ if (k != 0) error(k, fn_app, RECCNT(itemset), BUFFER(itemset)); if (in != stdin) /* if not read from standard input, */ fclose(in); /* close the input file */ MSG(fprintf(stderr, "[%d item(s)] done ", is_cnt(itemset))); MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t))); } /* print a log message */ /* --- read transactions --- */ t = clock(); /* start the timer */ if (fn_in && *fn_in) /* if an input file name is given, */ in = fopen(fn_in, "r"); /* open input file for reading */ else { /* if no input file name is given, */ in = stdin; fn_in = "<stdin>"; } /* read from standard input */ MSG(fprintf(stderr, "reading %s ... ", fn_in)); if (!in) error(E_FOPEN, fn_in); for (tacnt = 0; 1; tacnt++) { /* transaction read loop */ k = is_read(itemset, in); /* read the next transaction */ if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset)); if (k > 0) break; /* check for error and end of file */ k = is_tsize(itemset); /* update the maximal */ if (k > maxcnt) maxcnt = k; /* transaction size */ if (taset && (tas_add(taset, NULL, 0) != 0)) error(E_NOMEM); /* add the loaded transaction */ } /* to the transaction set */ if (taset) { /* if transactions have been loaded */ if (in != stdin) fclose(in);/* if not read from standard input, */ in = NULL; /* close the input file */ } /* clear the file variable */ n = is_cnt(itemset); /* get the number of items */ MSG(fprintf(stderr, "[%d item(s),", n)); MSG(fprintf(stderr, " %d transaction(s)] done ", tacnt)); MSG(fprintf(stderr, "[%.2fs].", SEC_SINCE(t))); if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS); MSG(fprintf(stderr, "\n")); /* check for at least one transaction */ if (supp < 0) { /* if absolute support is given */ if (!(sout & 2)) sout = 2; /* switch to absolute support output */ supp = (-100 *supp -0.25) /tacnt; if (supp < 0) supp = 0; /* compute a proper relative support */ } /* and check it against 0 */ if (smax < 0) { /* if absolute support is given */ smax = (-100 *smax -0.25) /tacnt; if (smax < 0) smax = 0; /* compute a proper relative support */ } /* and check it against 0 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -