📄 relx.c
字号:
/*---------------------------------------------------------------------- File : relx.c Contents: relim algorithm for finding frequent item sets (extended version, supporting penalized item insertions) Author : Christian Borgelt History : 2004.11.05 file created from eclat.c 2004.11.17 first reasonably fast version completed 2004.11.18 start of loop over transactions lists improved 2004.11.20 transaction tree version added 2004.11.23 absolute/relative support output changed 2004.12.09 filter added (binary logarithm of supp. quotient) 2004.12.12 penalized insertion of missing items added 2005.06.20 use of flag for "no item sorting" corrected 2006.11.26 adapted to new structures ISFMTR and ISEVAL----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <limits.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "relx"#define DESCRIPTION "find frequent item sets with item insertions"#define VERSION "version 1.7 (2008.01.25) " \ "(c) 2004-2008 Christian Borgelt"/* --- marker --- */#define RE_EOT INT_MIN /* end of transaction marker *//* --- output flags --- */#define OF_REL 0x01 /* print relative support */#define OF_ABS 0x02 /* print absolute support */#define OF_DEV 0x04 /* print deviation from indep. occ. */#define OF_SCAN 0x08 /* convert names to scanable form *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_SUPP (-9) /* invalid item set support */#define E_ITEMCNT (-10) /* invalid number of items */#define E_NOTAS (-11) /* no items or transactions */#define E_PENEXP (-12) /* insertion penalty expected */#define E_PENALTY (-13) /* invalid insertion penalty */#define E_UNKNOWN (-18) /* unknown error */#ifndef QUIET /* if not quiet version */#define MSG(x) x /* print messages */#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (ts_reccnt(is_tabscan(s)) \ - ((ts_delim(is_tabscan(s)) == TS_REC) ? 1 : 0))#define BUFFER(s) ts_buf(is_tabscan(s))/*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef int REPFN (int *ids, int cnt, int pfx, float supp);typedef struct _tale { /* --- transaction list element --- */ struct _tale *succ; /* pointer to successor element */ int *items; /* items in the transaction */ float wgt; /* transaction weight */} TALE; /* (transaction list element) */typedef struct { /* --- transaction (tree) list --- */ TALE *head; /* head element of the list */ float supp; /* number of transactions */} TALIST; /* (transaction (tree) list) */typedef struct { /* --- recursive elimination search */ int cnt; /* total number of items */ int min; /* minimum number of items */ int max; /* maximum number of items */ int trcnt; /* total number of transactions */ float supp; /* minimum support (number of trans.) */ float minwgt; /* minimal transaction weight */ float *pens; /* insertion penalties per item */ REPFN *report; /* report function for results */ int isc; /* number of frequent item sets */ TALIST **lists; /* vectors of trans. (tree) lists */ TALE **elems; /* vectors of list elements */ int items[1]; /* item vector for reporting */} RELIM; /* (recursive elimination search) *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#ifndef QUIET /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_SUPP -9 */ "invalid minimal support %g%%\n", /* E_ITEMCNT -10 */ "invalid number of items %d\n", /* E_NOTAS -11 */ "no items or transactions to work on\n", /* -12 to -15 */ NULL, NULL, NULL, NULL, /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_UNKNOWN -18 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file */static int tacnt = 0; /* number of transactions */static ISEVAL *isevl = NULL; /* item set evaluator */static ISFMTR *isfmt = NULL; /* item set formatter */static double mindev = -DBL_MAX; /* minimal value of deviation */static float *pens = NULL; /* insertion penalties per item */static int flags = OF_REL; /* output flags */static char *fmt = "%.1f"; /* output format for support *//*---------------------------------------------------------------------- Item Set Report Function----------------------------------------------------------------------*/static int report (int *ids, int cnt, int pfx, float supp){ /* --- report a frequent item set */ double dev = 0; /* deviation from indep. occurrence */ assert(ids); /* check the function arguments */ if (flags & OF_DEV) { /* if to filter w.r.t. deviation */ dev = ise_eval(isevl, ids, cnt, pfx, supp); if (dev < mindev) return 0; /* compute and check the deviation */ } /* from an independent occurrence */ isf_format(isfmt, ids, cnt, pfx); isf_print(isfmt, out); /* print the items of the set */ fputs(" (", out); /* print rel. and abs. support */ if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100); fputs((flags & OF_ABS) ? "%/" : "%", out); } if (flags & OF_ABS) { fprintf(out, fmt, supp); } if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); } fputs(")\n", out); /* print deviation from independence */ return 1; /* return 'reported 1 item set' */} /* report() *//*---------------------------------------------------------------------- Recursive Elimination Functions----------------------------------------------------------------------*/static int re_vecs (TALIST *lists, int i, int d, int pfx, RELIM *re){ /* --- recursive elimination */ int item, x; /* buffers */ TALIST *subls, *s; /* lists of subset of transactions */ TALE *elems; /* elements of transaction lists */ TALE *src, *dst, *buf; /* to traverse the transaction lists */ float pen, wgt; /* insertion penalty for an item */ subls = re->lists[d]; /* get the subset lists vector */ if (!subls) { /* if it does not exist yet */ re->lists[d] = subls = (TALIST*)calloc(re->cnt+1, sizeof(TALIST)); if (!subls) return -1; /* create a new subset lists vector */ } /* and store it in the vector buffer */ elems = re->elems[d]; /* get the list elements vector */ if (!elems) { /* if it does not exist yet */ re->elems[d] = elems = (TALE*) malloc(re->trcnt *sizeof(TALE)); if (!elems) return -1; /* create a new list elements vector */ } /* and store it in the vector buffer */ for (d++; 1; ) { /* traverse the transaction lists */ s = subls +re->cnt; /* clear special transaction sublist */ s->head = NULL; s->supp = 0;/* (collecting empty transactions) */ s = lists +i; /* get the next transaction list */ /* -- process transactions not containing item i -- */ dst = (d < re->max) ? elems : NULL; if (re->pens) { /* if insertion costs are given */ pen = re->pens[item = i]; /* get the item insertion penalty */ if (!dst) { /* if the maximum size is reached */ while (++item <= re->cnt) { for (src = lists[item].head; src; src = src->succ) { wgt = src->wgt *pen; if (wgt >= re->minwgt) s->supp += wgt; } /* check the penalized trans. weight */ } } /* and process trans. if sufficient */ else { /* if maximum size not reached */ while (++item <= re->cnt) { for (src = lists[item].head; src; src = src->succ) { dst->wgt = src->wgt *pen; if (dst->wgt < re->minwgt) continue; /* check the minimum trans. weight */ s->supp += dst->wgt;/* and process trans. if sufficient */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -