📄 apriori.c
字号:
case 't': target = (*s) ? *s++ : 'r'; break; case 'm': minlen = (int)strtol(s, &s, 0); break; case 'n': maxlen = (int)strtol(s, &s, 0); break; case 's': supp = 0.01*strtod(s, &s); break; case 'c': conf = 0.01*strtod(s, &s); break; case 'o': rsdef = IST_BOTH; break; case 'x': ext = 1; break; case 'a': abs = 1; break; case 'p': fmt = "%g%%"; break; case 'e': arem = (*s) ? *s++ : 0; break; case 'd': minval = 0.01*strtod(s, &s); break; case 'v': aval = 1; break; case 'g': c2scf = 1; break; case 'l': load = 0; break; case 'q': sort = (int)strtol(s, &s, 0); break; case 'z': memopt = 1; break; case 'i': optarg = &cominds; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (optarg && *s) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-options */ case 0: fn_in = s; break; case 1: fn_out = s; break; case 2: fn_app = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if ((k < 2) || (k > 3)) /* and the number of arguments */ error(E_ARGCNT); /* (either in/out or in/out/app) */ if ((!fn_in || !*fn_in) && (fn_app && !*fn_app)) error(E_STDIN); /* stdin must not be used twice */ switch (target) { /* check and translate target type */ case 's': target = TT_SET; break; case 'r': target = TT_RULE; break; case 'h': target = TT_HEDGE; break; default : error(E_TARGET, (char)target); break; } if ((supp < 0) || (supp > 1)) error(E_SUPP, supp); /* check the minimal support */ if ((conf < 0) || (conf > 1)) error(E_CONF, conf); /* check the minimal confidence */ if (minlen <= 0) error(E_RULELEN, minlen); /* check the limits */ if (maxlen <= 0) error(E_RULELEN, maxlen); /* for the rule length */ switch (arem) { /* check and translate measure */ case 0 : case '0': arem = EM_NONE; break; case 'd': case '1': arem = EM_DIFF; break; case 'q': case '2': arem = EM_QUOT; break; case 'a': case '3': arem = EM_AIMP; break; case 'i': case '4': arem = EM_INFO; break; case 'c': case '5': arem = EM_CHI2; break; default : error(E_MEASURE, (char)arem); break; } if ((minval < 0) || ((arem != EM_AIMP) && (minval > 1))) error(E_MVAL, minval); /* check the measure parameter */ if (target == TT_HEDGE){ /* in hyperedge mode */ minval = conf; conf = 1;}/* adapt the parameters */ else if (target == TT_SET) { /* in itemset mode neutralize */ rsdef = IST_BOTH; conf = 1;}/* rule specific settings */ if (arem == EM_NONE) /* if no add. rule eval. measure, */ aval = 0; /* clear the corresp. output flag */ /* --- create item set and transaction set --- */ itemset = is_create(); /* create an item set and */ if (!itemset) error(E_NOMEM); /* set the special characters */ is_chars(itemset, blanks, fldseps, recseps, cominds); if (load) { /* if to load the transactions */ taset = tas_create(itemset); if (!taset) error(E_NOMEM); /* create a transaction set */ } /* to store the transactions */ MSG(fprintf(stderr, "\n")); /* terminate the startup message */ /* --- read item appearances --- */ if (fn_app) { /* if item appearances are given */ t = clock(); /* start the timer */ if (!*fn_app) { /* if no app. file name is given, */ in = stdin; fn_app = "<stdin>"; } /* read from std. input */ else { /* if an app. file name is given, */ in = fopen(fn_app, "r"); /* open the item appearances file */ if (!in) error(E_FOPEN, fn_app); } /* afterwards print a log message */ MSG(fprintf(stderr, "reading %s ... ", fn_app)); k = is_readapp(itemset,in); /* read the item appearances */ if (k != 0) error(k, fn_app, RECCNT(itemset), BUFFER(itemset)); if (in != stdin) /* if not read from standard input, */ fclose(in); /* close the input file */ MSG(fprintf(stderr, "[%d item(s)] done ", is_cnt(itemset))); MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t))); } /* print a log message */ /* --- read transactions --- */ t = clock(); /* start the timer */ if (!fn_in || !*fn_in) { /* if no input file name is given, */ in = stdin; fn_in = "<stdin>"; } /* read from standard input */ else { /* if an input file name is given, */ in = fopen(fn_in, "r"); /* open input file for reading */ if (!in) error(E_FOPEN, fn_in); } /* afterwards print a log message */ MSG(fprintf(stderr, "reading %s ... ", fn_in)); for (tacnt = 0; 1; tacnt++) { /* transaction read loop */ k = is_read(itemset, in); /* read the next transaction */ if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset)); if (k > 0) break; /* check for error and end of file */ k = is_tsize(itemset); /* update the maximal */ if (k > maxcnt) maxcnt = k; /* transaction size */ if (taset && (tas_add(taset, NULL, 0) != 0)) error(E_NOMEM); /* add the loaded transaction */ } /* to the transaction set */ if (taset) { /* if transactions have been loaded */ if (in != stdin) fclose(in);/* if not read from standard input, */ in = NULL; /* close the input file */ } /* clear the file variable */ n = is_cnt(itemset); /* get the number of items */ MSG(fprintf(stderr, "[%d item(s),", n)); MSG(fprintf(stderr, " %d transaction(s)] done ", tacnt)); MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t))); /* --- sort and recode items --- */ if (sort != 0) { /* sort items w.r.t. their frequency */ MSG(fprintf(stderr, "sorting and recoding items ... ")); t = clock(); /* start the timer */ p = (int*)malloc(is_cnt(itemset) *sizeof(int)); if (!p) error(E_NOMEM); /* create an item identifier map */ if (rsdef == IST_BODY) /* if rule support = body support */ k = (int)ceil(tacnt *supp *conf); else /* if rule supp. = body&head support */ k = (int)ceil(tacnt *supp); n = is_recode(itemset, k, sort, p); if (taset) { /* sort and recode the items and */ tas_recode(taset, p, n); /* recode the loaded transactions */ maxcnt = tas_max(taset); /* get the new maximal t.a. size */ } /* (may be smaller than before) */ free(p); /* delete the item identifier map */ MSG(fprintf(stderr, "[%d item(s)] ", n)); MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t))); } /* print a log message */ /* --- create an item set tree --- */ apps = (char*)malloc(n *sizeof(char)); if (!apps) error(E_NOMEM); /* get the appearance indicators */ for (apps += i = n; --i >= 0; ) *--apps = is_getapp(itemset, i); istree = ist_create(n, supp, conf, rsdef, apps, memopt); free(apps); /* create an item set tree and */ if (!istree) error(E_NOMEM); /* delete the item app. vector */ for (k = n; --k >= 0; ) /* set single item frequencies */ ist_setcnt(istree, k, is_getfrq(itemset, k)); ist_settac(istree, tacnt); /* set the number of transactions */ if (maxlen > maxcnt) /* clamp the rule length */ maxlen = maxcnt; /* to the maximum set size */ /* --- check item subsets --- */ MSG(fprintf(stderr, "checking subsets of size 1")); t = clock(); /* start the timer */ while (ist_height(istree) < maxlen) { k = ist_addlvl(istree); /* while max. height is not reached, */ if (k < 0) error(E_NOMEM); /* add a level to the item set tree */ if (k != 0) break; /* if no level was added, abort */ MSG(fprintf(stderr, " %d", ist_height(istree))); if (taset) { /* if transactions were loaded */ for (i = tacnt; --i >= 0; ) { ist_count(istree, tas_tract(taset, i), tas_tsize(taset, i)); } } /* traverse and count transactions */ else { /* if to work on the input file, */ rewind(in); /* reset the file position */ for (maxcnt = 0; (i = is_read(itemset, in)) == 0; ) { k = is_tsize(itemset); /* (re)read the transactions and */ if (k > maxcnt) maxcnt = k; /* update the maximum size */ ist_count(istree, is_tract(itemset), k); } /* count the transaction in the tree */ if (i < 0) error(i, fn_in, RECCNT(itemset), BUFFER(itemset)); if (maxcnt < maxlen) /* update the maximal rule length */ maxlen = maxcnt; /* according to the max. t.a. size */ } /* (may be smaller than before) */ } if (!taset) { /* if transactions were not loaded */ if (in != stdin) fclose(in);/* if not read from standard input, */ in = NULL; /* close the input file */ } /* clear the file variable */ MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t))); /* --- sort transactions --- */ if (target == TT_SET) { /* if to find frequent item sets */ if (!taset) /* transactions must be loaded */ ext = 0; /* for extended support output */ else if (ext) { /* if extended output is requested */ MSG(fprintf(stderr, "sorting transactions ... ")); t = clock(); /* start the timer */ tas_sort(taset); /* sort the transactions */ MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t))); } /* (sorting is necessary to find the */ } /* number of identical transactions) */ /* --- print item sets/rules/hyperedges --- */ t = clock(); /* start the timer */ if (!fn_out || !*fn_out) { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to std. output */ else { /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file */ if (!out) error(E_FOPEN, fn_out); MSG(fprintf(stderr, "writing %s ... ", fn_out)); } /* print a log message */ ist_init(istree, minlen, arem, minval); p = is_tract(itemset); /* get the transaction buffer */ if (target == TT_SET) { /* if to find frequent item sets, */ for (n = 0; 1; n++) { /* extract item sets from the tree */ k = ist_set(istree, p, &supp); if (k <= 0) break; /* get the next frequent item set */ for (i = 0; i < k; i++) { /* and traverse its items */ name = is_name(itemset, p[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the name of the next item */ putc(' ', out); /* and a blank as a separator */ } fputs(" (", out); /* print the item set's support */ fprintf(out, fmt, supp *100); if (abs) fprintf(out, "/%.0f", supp *tacnt); if (ext) { /* if to print the extended support */ supp = tas_occur(taset, p, k); fputs(", ", out); /* get the number of occurrences */ fprintf(out, fmt, (supp/tacnt) *100); if (abs) fprintf(out, "/%.0f", supp); } /* print the extended support data */ fputs(")\n", out); /* terminate the support output */ } } else if (target == TT_RULE) { /* if to find association rules, */ for (n = 0; 1; n++) { /* extract rules from tree */ k = ist_rule(istree, p, &supp, &conf, &minval); if (k <= 0) break; /* get the next association rule */ for (i = 0; i < k; i++) { /* and traverse its items */ name = is_name(itemset, p[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the next item */ fputs((i <= 0) ? " <- " : " ", out); } /* print a separator */ fputs(" (", out); /* print the rule evaluation */ if (ext && (rsdef == IST_BODY)) { fprintf(out, fmt, supp *conf *100); if (abs) fprintf(out, "/%.0f", supp *conf *tacnt); fputs(", ", out); /* print the support of the rule */ } /* from the support of the body */ fprintf(out, fmt, supp *100); if (abs) fprintf(out, "/%.0f", supp *tacnt); fputs(", ", out); /* print the rule support */ if (ext && (rsdef == IST_BOTH)) { fprintf(out, fmt, (supp/conf) *100); if (abs) fprintf(out, "/%.0f", (supp/conf) *tacnt); fputs(", ", out); /* print the support of the body */ } /* from the support of the rule */ fprintf(out, fmt, conf *100); /* print the rule confidence */ if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); } fputs(")\n", out); /* print the value of the additional */ } } /* rule evaluation measure */ else { /* if to find association hyperedges */ for (n = 0; 1; n++) { /* extract hyperedges from tree */ k = ist_hedge(istree, p, &supp, &conf); if (k <= 0) break; /* get the next hyperedge */ for (i = 0; i < k; i++) { /* and traverse its items */ name = is_name(itemset, p[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the name of the next item */ putc(' ', out); /* and a blank as a separator */ } fputs(" (", out); fprintf(out, fmt, supp *100); if (abs) fprintf(out, "/%.0f", supp *tacnt); fputs(", ", out); fprintf(out, fmt, conf *100); fputs(")\n", out); /* print support and confidence */ } /* of the hyperedge */ } /* if (target == TT_SET) .. else .. */ if (fflush(out) != 0) error(E_FWRITE, fn_out); if (out != stdout) fclose(out); out = NULL; /* close the output file */ MSG(fprintf(stderr, "[%d %s(s)] done ", n, ttypes[target])); MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t))); /* --- clean up --- */ #ifndef NDEBUG /* if this is a debug version */ ist_delete(istree); /* delete the item set tree, */ if (taset) tas_delete(taset, 0); /* the transaction set, */ is_delete(itemset); /* and the item set */ #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -