📄 apriori.c
字号:
/* --- sort and recode items --- */ MSG(fprintf(stderr, "sorting and recoding items ... ")); t = clock(); /* start the timer */ map = (int*)malloc(is_cnt(itemset) *sizeof(int)); if (!map) error(E_NOMEM); /* create an item identifier map */ if (rsdef == IST_BODY) /* if rule support = body support */ k = (int)ceil(tacnt *supp *conf); else /* if rule supp. = body&head support */ k = (int)ceil(tacnt *supp); /* compute the absolut support */ n = is_recode(itemset, k, sort, map); if (taset) { /* sort and recode the items and */ tas_recode(taset, map,n); /* recode the loaded transactions */ maxcnt = tas_max(taset); /* get the new maximal t.a. size */ } /* (may be smaller than before) */ free(map); /* delete the item identifier map */ MSG(fprintf(stderr, "[%d item(s)] ", n)); MSG(fprintf(stderr, "done [%.2fs].", SEC_SINCE(t))); if (n <= 0) error(E_NOTAS); /* print a log message and */ MSG(fprintf(stderr, "\n")); /* check the number of items */ /* --- create a transaction tree --- */ tt = 0; /* init. the tree construction time */ if (tree && taset) { /* if transactions were loaded */ MSG(fprintf(stderr, "creating transaction tree ... ")); t = clock(); /* start the timer */ tatree = tat_create(taset, heap); if (!tatree) error(E_NOMEM);/* create a transaction tree */ if (filter == 0) { /* if a tree rebuild is not needed, */ tas_delete(taset, 0); taset = NULL; } /* delete transactions */ tt = clock() -t; /* note the time for the construction */ MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t))); } /* print a log message */ /* --- create an item set tree --- */ t = clock(); tc = 0; /* start the timer */ apps = (char*)malloc(n *sizeof(char)); if (!apps) error(E_NOMEM); /* get the appearance indicators */ for (apps += i = n; --i >= 0; ) *--apps = is_getapp(itemset, i); istree = ist_create(n, supp, conf, rsdef, apps, memopt); if (!istree) error(E_NOMEM); /* create an item set tree */ for (k = n; --k >= 0; ) /* set single item frequencies */ ist_setcnt(istree, k, is_getfrq(itemset, k)); ist_settac(istree, tacnt); /* set the number of transactions */ if (maxlen > maxcnt) /* clamp the rule length */ maxlen = maxcnt; /* to the maximum set size */ /* --- check item subsets --- */ MSG(fprintf(stderr, "checking subsets of size 1")); while (ist_height(istree) < maxlen) { if (filter != 0) { /* if to filter w.r.t. item usage, */ i = ist_check(istree, apps); /* check current item usage */ if (i < maxlen) maxlen = i; /* update the maximum size */ if (ist_height(istree) >= i) break; } /* check the tree height */ k = ist_addlvl(istree); /* while max. height is not reached, */ if (k < 0) error(E_NOMEM); /* add a level to the item set tree */ if (k != 0) break; /* if no level was added, abort */ MSG(fprintf(stderr, " %d", ist_height(istree))); if (tatree) { /* if a transaction tree was created */ if (((filter < 0) /* if to filter w.r.t. item usage */ && (i < -filter *n)) /* and enough items were removed */ || ((filter > 0) /* or counting time is long enough */ && (i < n) && (i *(double)tt < filter *n *tc))) { n = i; x = clock(); /* note the new number of items */ tas_filter(taset,apps); /* and remove unnecessary items */ tat_delete(tatree); /* delete the transaction tree */ tatree = tat_create(taset, heap); if (!tatree) error(E_NOMEM); tt = clock() -x; /* rebuild the transaction tree and */ } /* note the new construction time */ x = clock(); /* count the transaction tree */ ist_countx(istree, tatree); tc = clock() -x; } /* note the new count time */ else if (taset) { /* if transactions were loaded */ if (((filter < 0) /* if to filter w.r.t. item usage */ && (i <= -filter *n)) /* and enough items were removed */ || ((filter > 0) /* or counting time is long enough */ && (i *(double)tt <= filter *n *tc))) { n = i; x = clock(); /* note the new number of items */ tas_filter(taset,apps); /* and remove unnecessary items */ tt = clock() -t; /* from the transactions */ } /* note the filtering time */ for (i = tacnt; --i >= 0;)/* traverse and count transactions */ ist_count(istree, tas_tract(taset, i), tas_tsize(taset, i)); tc = clock() -t; } /* note the new count time */ else { /* if to work on the input file, */ rewind(in); /* reset the file position */ for (maxcnt = 0; (i = is_read(itemset, in)) == 0; ) { if (filter != 0) /* (re)read the transactions and */ is_filter(itemset, apps); /* remove unnecessary items */ k = is_tsize(itemset); /* update the maximum size */ if (k > maxcnt) maxcnt = k; /* of a transaction */ ist_count(istree, is_tract(itemset), k); } /* count the transaction in the tree */ if (i < 0) error(i, fn_in, RECCNT(itemset), BUFFER(itemset)); if (maxcnt < maxlen) /* update the maximal rule length */ maxlen = maxcnt; /* according to the max. t.a. size */ } /* (may be smaller than before) */ } if (!taset && !tatree) { /* if transactions were not loaded */ if (in != stdin) fclose(in);/* if not read from standard input, */ in = NULL; /* close the input file */ } /* clear the file variable */ MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t))); /* --- filter found item sets --- */ if ((target == TT_MFSET) || (target == TT_CLSET)) { MSG(fprintf(stderr, "filtering %s item sets ... ", (target == TT_MFSET) ? "maximal" : "closed")); t = clock(); /* filter the item sets */ ist_filter(istree, (target == TT_MFSET) ? IST_MAXFRQ : IST_CLOSED); MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t))); } /* (filter takes longer than print) */ /* --- sort transactions --- */ if (target <= TT_CLSET) { /* if to find frequent item sets */ if (!taset) /* transactions must be loaded */ ext = 0; /* for extended support output */ else if (ext) { /* if extended output is requested */ MSG(fprintf(stderr, "sorting transactions ... ")); t = clock(); /* start the timer */ tas_sort(taset, heap); /* sort the transactions */ MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t))); } /* (sorting is necessary to find the */ } /* number of identical transactions) */ /* --- print item sets/rules/hyperedges --- */ t = clock(); /* start the timer */ if (fn_out && *fn_out) /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file */ else { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to std. output */ MSG(fprintf(stderr, "writing %s ... ", fn_out)); if (!out) error(E_FOPEN, fn_out); ist_init(istree, minlen, arem, minval); set = is_tract(itemset); /* get the transaction buffer */ if (target <= TT_CLSET) { /* if to find frequent item sets */ for (n = 0; 1; ) { /* extract item sets from the tree */ k = ist_set(istree, set, &supp, &conf); if (k <= 0) break; /* get the next frequent item set */ if (supp > smax) continue;/* check against maximal support */ for (i = 0; i < k; i++) { /* traverse the set's items */ name = is_name(itemset, set[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the name of the next item */ fputs((i < k-1) ? sep : " ", out); } /* print a separator */ fputs(" (", out); /* print the item set's support */ if (sout & 1) { fprintf(out, fmt, supp *100); fputs((sout & 2) ? "%/" : "%", out); } if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); } if (ext) { /* if to print the extended support */ supp = tas_occur(taset, set, k); fputs(", ", out); /* get the number of occurrences */ fprintf(out, fmt, (supp/tacnt) *100); if (sout & 2) fprintf(out, "/%.0f", supp); } /* print the extended support data */ if (aval) { fputs(", ", out); fprintf(out, fmt, conf *100); } fputs(")\n", out); /* print the add. eval. measure, */ n++; /* terminate the support output, */ } } /* and count the item set */ else if (target == TT_RULE) { /* if to find association rules, */ for (n = 0; 1; ) { /* extract rules from tree */ k = ist_rule(istree, set, &supp, &conf, &lftval, &minval); if (k <= 0) break; /* get the next association rule */ if (supp > smax) continue;/* check against maximal support */ for (i = 0; i < k; i++) { /* traverse the rule's items */ name = is_name(itemset, set[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the next item */ fputs((i <= 0) ? " <- " : ((i < k-1) ? sep : " "), out); } /* print a separator */ fputs(" (", out); /* print the rule evaluation */ if (ext && (rsdef == IST_BODY)) { if (sout & 1) { fprintf(out, fmt, supp *conf *100); if (sout & 2) fputc('/', out); } if (sout & 2) { fprintf(out, "%d", (int)(supp*conf*tacnt+0.5));} fputs(", ", out); /* print the support of the rule */ } /* from the support of the body */ if (sout & 1) { fprintf(out, fmt, supp *100); if (sout & 2) fputc('/', out); } if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); } fputs(", ", out); /* print the rule support */ if (ext && (rsdef == IST_BOTH)) { if (sout & 1) { fprintf(out, fmt, (supp/conf) *100); if (sout & 2) fputc('/', out); } if (sout & 2) { fprintf(out,"%d",(int)((supp/conf)*tacnt+0.5));} fputs(", ", out); /* print the support of the body */ } /* from the support of the rule */ fprintf(out, fmt, conf *100); /* print the rule confidence */ if (lift) { fputs(", ", out); fprintf(out, fmt, lftval *100); } if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); } fputs(")\n", out); /* print the value of the additional */ n++; /* rule evaluation measure and */ } } /* count the association rule */ else { /* if to find association hyperedges */ for (n = 0; 1; ) { /* extract hyperedges from tree */ k = ist_hedge(istree, set, &supp, &conf); if (k <= 0) break; /* get the next hyperedge */ if (supp > smax) continue;/* check against maximal support */ for (i = 0; i < k; i++) { /* traverse the edge's items */ name = is_name(itemset, set[i]); if (c2scf) { sc_format(buf, name, 0); name = buf; } fputs(name, out); /* print the name of the next item */ fputs((i < k-1) ? sep : " ", out); } /* print a separator */ fputs(" (", out); if (sout & 1) { fprintf(out, fmt, supp *100); if (sout & 2) fputc('/', out); } if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); } fputs(", ", out); fprintf(out, fmt, conf *100); fputs(")\n", out); /* print support and confidence */ n++; /* of the hyperedge and */ } /* count the hyperedge */ } /* if (target <= TT_MFSET) .. else .. */ if (fflush(out) != 0) error(E_FWRITE, fn_out); if (out != stdout) fclose(out); out = NULL; /* close the output file */ MSG(fprintf(stderr, "[%d %s(s)] done ", n, ttypes[target])); MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t))); #ifdef BENCH printf("number of counters : %d\n", istree->cnt); printf("necessary counters : %d\n", istree->nec); printf("number of child pointers: %d\n", istree->chcnt); printf("necessary child pointers: %d\n", istree->chnec); printf("allocated bytes : %d\n", istree->bytes); #endif /* --- clean up --- */ #ifndef NDEBUG /* if this is a debug version */ free(apps); /* delete the item app. vector */ ist_delete(istree); /* delete the item set tree, */ if (tatree) tat_delete(tatree); /* the transaction tree, */ if (taset) tas_delete(taset, 0); /* the transaction set, */ is_delete(itemset); /* and the item set */ #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -