📄 relim.c
字号:
/*---------------------------------------------------------------------- File : relim.c Contents: relim algorithm for finding frequent item sets Author : Christian Borgelt History : 05.11.2004 file created from eclat.c 17.11.2004 first reasonably fast version completed 18.11.2004 start of loop over transactions lists improved 20.11.2004 transaction tree version added 23.11.2004 absolute/relative support output changed 09.12.2004 filter added (binary logarithm of supp. quotient)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <limits.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "relim"#define DESCRIPTION "find frequent item sets with the relim algorithm"#define VERSION "version 1.10 (2004.12.15) " \ "(c) 2004 Christian Borgelt"/* --- marker --- */#define RE_EOT INT_MIN /* end of transaction marker *//* --- output flags --- */#define OF_REL 0x01 /* print relative support */#define OF_ABS 0x02 /* print absolute support */#define OF_DEV 0x04 /* print deviation from indep. occ. */#define OF_SCANF 0x08 /* convert names to scanable form *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_SUPP (-9) /* invalid item set support */#define E_ITEMCNT (-10) /* invalid number of items */#define E_NOTAS (-11) /* no items or transactions */#define E_UNKNOWN (-18) /* unknown error */#ifndef QUIET /* if not quiet version */#define MSG(x) x /* print messages */#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \ + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s) tfs_buf(is_tfscan(s))/*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef int REPFN (int *ids, int cnt, int supp);typedef struct _tale { /* --- transaction list element --- */ struct _tale *succ; /* pointer to successor element */ int *items; /* items in the transaction */} TALE; /* (transaction list element) */typedef struct _ttle { /* --- trans. tree list element --- */ struct _ttle *succ; /* pointer to successor element */ void *items; /* items in the transaction (tree) */ int supp; /* number of represented transactions */} TTLE; /* (transaction tree list element) */typedef struct { /* --- transaction (tree) list --- */ void *head; /* head element of the list */ int supp; /* number of transactions */} TALIST; /* (transaction (tree) list) */typedef struct { /* --- recursive elimination search */ int cnt; /* total number of items */ int min; /* minimum number of items */ int max; /* maximum number of items */ int trcnt; /* total number of transactions */ int supp; /* minimum support (number of trans.) */ REPFN *report; /* report function for results */ int isc; /* number of frequent item sets */ TALIST **lists; /* vectors of trans. (tree) lists */ void **elems; /* vectors of list elements */ int items[1]; /* item vector for reporting */} RELIM; /* (recursive elimination search) *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) */#ifndef QUIET /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_SUPP -9 */ "invalid minimal support %g%%\n", /* E_ITEMCNT -10 */ "invalid number of items %d\n", /* E_NOTAS -11 */ "no items or transactions to work on\n", /* -12 to -15 */ NULL, NULL, NULL, NULL, /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_UNKNOWN -18 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static TATREE *tatree = NULL; /* transaction tree */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file */static int tacnt = 0; /* number of transactions */static double mindev = -DBL_MAX; /* minimal value of deviation */static double *logfs = NULL; /* logarithms of item frequencies */static int flags = OF_REL; /* output flags */static char *fmt = "%.1f"; /* output format for support */static char buf[4*TFS_SIZE+4]; /* buffer for formatting *//*---------------------------------------------------------------------- Item Set Report Function----------------------------------------------------------------------*/static int report (int *ids, int cnt, int supp){ /* --- report a frequent item set */ int i; /* loop variable */ const char *name; /* buffer for item name */ double dev; /* deviation from indep. occurrence */ assert(ids); /* check the function arguments */ if (flags & OF_DEV) { /* if to filter w.r.t. deviation */ for (dev = 0, i = cnt; --i >= 0; ) dev += logfs[ids[i]]; /* sum the logs of the item freqs. */ dev = (log(supp/(double)tacnt) -dev) /LN_2; if (dev < mindev) return 0; /* compute and check the deviation */ } /* from an independent occurrence */ for (i = 0; i < cnt; i++) { /* traverse the item set */ name = is_name(itemset, ids[i]); if (flags & OF_SCANF) { /* convert to scanable form */ sc_format(buf, name, 0); name = buf; } fputs(name, out); fputc(' ', out); } /* print the item names */ fputs(" (", out); /* print rel. and abs. support */ if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100); fputs((flags & OF_ABS) ? "%/" : "%", out); } if (flags & OF_ABS) { fprintf(out, "%d", supp); } if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); } fputs(")\n", out); /* terminate the support output */ return 1; /* return 'reported 1 item set' */} /* report() *//*---------------------------------------------------------------------- Recursive Elimination Functions----------------------------------------------------------------------*/static int re_vecs (TALIST *lists, int d, int i, RELIM *re){ /* --- recursive elimination */ int k, item; /* loop variable, buffers */ TALIST *subls, *s; /* lists of subset of transactions */ TALE *elems; /* elements of transaction lists */ TALE *src, *dst, *buf; /* to traverse the transaction lists */ subls = re->lists[d]; /* get the subset lists vector */ if (!subls) { /* if it does not exist yet */ re->lists[d] = subls = (TALIST*)calloc(re->cnt, sizeof(TALIST)); if (!subls) return -1; /* create a new subset lists vector */ } /* and store it in the vector buffer */ elems = (TALE*)re->elems[d]; /* get the list elements vector */ if (!elems) { /* if it does not exist yet */ re->elems[d] = elems = (TALE*) malloc(re->trcnt *sizeof(TALE)); if (!elems) return -1; /* create a new list elements vector */ } /* and store it in the vector buffer */ for (d++; i < re->cnt; i++) { /* traverse the transaction lists */ s = lists +i; /* get the next transaction list */ if (s->supp <= 0) continue; /* skip empty transaction lists */ dst = NULL; /* default: only eliminate item */ if (s->supp >= re->supp) { /* if the support is high enough */ if (d < re->max) /* if not already at maximum size, */ dst = elems; /* collect subset of transactions */ re->items[d-1] = i; /* store the last item of the set */ if (d >= re->min) /* if the item set is large enough */ re->isc += re->report(re->items, d, s->supp); } /* report the frequent item set */ s->supp = 0; /* clear the transaction counter */ src = (TALE*)s->head; /* get the transaction list */ if (!src) continue; /* skip empty transaction lists */ s->head = NULL; /* clear the list's head pointer */ do { /* item elimination loop */ buf = src; /* get the next list element and */ src = src->succ; /* remove it from the trans. list */ item = *buf->items++; /* get and remove the first item */ if (item & RE_EOT) { /* if there is only the first item, */ item &= ~RE_EOT; /* get this item and increment the */ lists[item].supp++; /* corresponding element counters */ if (dst) { dst++; subls[item].supp++; } } else { /* if there is more than one item */ buf->succ = (TALE*)lists[item].head; lists[item].head = buf; /* move elem. to corresp. trans. list */ lists[item].supp++; /* and count the added transaction */ if (dst) { /* if to collect transactions */ dst->items = buf->items; dst->succ = (TALE*)subls[item].head; subls[item].head = dst++; subls[item].supp++; /* add the transaction in a new */ } /* list element at the head of the */ } /* corresponding transaction sublist */ } while (src); /* while transaction list not empty */ if (!dst) continue; /* if no trans. collected, skip rest */ k = (int)(dst -elems); /* compute number of transactions */ if (k >= re->supp) { /* if subset support is big enough */ if (re_vecs(subls, d, i+1, re) < 0) return -1; } /* process transactions recursively */ else if (k > 0) { /* if trans. have been collected */ for (k = re->cnt; --k > i; ) { subls[k].head = NULL; /* clear all lists that may have */ subls[k].supp = 0; /* been filled with transactions */ } /* to have a clean list vector */ } /* for the next collection run */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -