⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 relim.c

📁 数据挖掘中的relim算法,很好的代码
💻 C
📖 第 1 页 / 共 3 页
字号:
/*----------------------------------------------------------------------  File    : relim.c  Contents: relim algorithm for finding frequent item sets  Author  : Christian Borgelt  History : 2004.11.05 file created from eclat.c            2004.11.17 first reasonably fast version completed            2004.11.18 start of loop over transactions lists improved            2004.11.20 transaction tree version added            2004.11.23 absolute/relative support output changed            2004.12.09 filter added (binary logarithm of supp. quotient)            2005.06.20 use of flag for "no item sorting" corrected            2006.11.26 adapted to new structures ISFMTR and ISEVAL            2007.02.13 adapted to modified module tabscan----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <limits.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "relim"#define DESCRIPTION "find frequent item sets with the relim algorithm"#define VERSION     "version 1.14 (2008.01.25)        " \                    "(c) 2004-2008   Christian Borgelt"/* --- marker --- */#define RE_EOT      INT_MIN     /* end of transaction marker *//* --- output flags --- */#define OF_REL      0x01        /* print relative support */#define OF_ABS      0x02        /* print absolute support */#define OF_DEV      0x04        /* print deviation from indep. occ. */#define OF_SCAN     0x08        /* convert names to scanable form *//* --- error codes --- */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* too few/many arguments */#define E_STDIN     (-8)        /* double assignment of stdin */#define E_SUPP      (-9)        /* invalid item set support */#define E_ITEMCNT  (-10)        /* invalid number of items */#define E_NOTAS    (-11)        /* no items or transactions */#define E_UNKNOWN  (-18)        /* unknown error */#ifndef QUIET                   /* if not quiet version */#define MSG(x)        x         /* print messages */#else                           /* if quiet version */#define MSG(x)                  /* suppress messages */#endif#define SEC_SINCE(t)  ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s)     (ts_reccnt(is_tabscan(s)) \                      - ((ts_delim(is_tabscan(s)) == TS_REC) ? 1 : 0))#define BUFFER(s)     ts_buf(is_tabscan(s))/*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef int REPFN (int *ids, int cnt, int pfx, int supp);typedef struct _tale {          /* --- transaction list element --- */  struct _tale *succ;           /* pointer to successor element */  int          *items;          /* items in the transaction */} TALE;                         /* (transaction list element) */typedef struct _ttle {          /* --- trans. tree list element --- */  struct _ttle *succ;           /* pointer to successor element */  void         *items;          /* items in the transaction (tree) */  int          supp;            /* number of represented transactions */} TTLE;                         /* (transaction tree list element) */typedef struct {                /* --- transaction (tree) list --- */  void   *head;                 /* head element of the list */  int    supp;                  /* number of transactions */} TALIST;                       /* (transaction (tree) list) */typedef struct {                /* --- recursive elimination search */  int    cnt;                   /* total number of items */  int    min;                   /* minimum number of items */  int    max;                   /* maximum number of items */  int    trcnt;                 /* total number of transactions */  int    supp;                  /* minimum support (number of trans.) */  REPFN  *report;               /* report function for results */  int    isc;                   /* number of frequent item sets */  TALIST **lists;               /* vectors of trans. (tree) lists */  void   **elems;               /* vectors of list elements */  int    items[1];              /* item vector for reporting */} RELIM;                        /* (recursive elimination search) *//*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/#ifndef QUIET                   /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = {  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_STDIN    -8 */  "double assignment of standard input\n",  /* E_SUPP     -9 */  "invalid minimal support %g%%\n",  /* E_ITEMCNT -10 */  "invalid number of items %d\n",  /* E_NOTAS   -11 */  "no items or transactions to work on\n",  /*    -12 to -15 */  NULL, NULL, NULL, NULL,  /* E_ITEMEXP -16 */  "file %s, record %d: item expected\n",  /* E_DUPITEM -17 */  "file %s, record %d: duplicate item %s\n",  /* E_UNKNOWN -18 */  "unknown error\n"};#endif/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char    *prgname;        /* program name for error messages */#endifstatic ITEMSET *itemset = NULL;     /* item set */static TASET   *taset   = NULL;     /* transaction set */static TATREE  *tatree  = NULL;     /* transaction tree */static FILE    *in      = NULL;     /* input  file */static FILE    *out     = NULL;     /* output file */static int     tacnt    = 0;        /* number of transactions */static ISEVAL  *isevl   = NULL;     /* item set evaluator */static ISFMTR  *isfmt   = NULL;     /* item set formatter */static double  mindev   = -DBL_MAX; /* minimal value of deviation */static int     flags    = OF_REL;   /* output flags */static char    *fmt     = "%.1f";   /* output format for support *//*----------------------------------------------------------------------  Item Set Report Function----------------------------------------------------------------------*/static int report (int *ids, int cnt, int pfx, int supp){                               /* --- report a frequent item set */  double dev = 0;               /* deviation from indep. occurrence */  assert(ids);                  /* check the function arguments */  if (flags & OF_DEV) {         /* if to filter w.r.t. deviation */    dev = ise_eval(isevl, ids, cnt, pfx, supp);    if (dev < mindev) return 0; /* compute and check the deviation */  }                             /* from an independent occurrence */  isf_format(isfmt, ids, cnt, pfx);  isf_print(isfmt, out);        /* print the items of the set */  fputs(" (", out);             /* print rel. and abs. support */  if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100);                        fputs((flags & OF_ABS) ? "%/" : "%", out); }  if (flags & OF_ABS) { fprintf(out, "%d", supp); }  if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); }  fputs(")\n", out);            /* print deviation from independence */  return 1;                     /* return 'reported 1 item set' */}  /* report() *//*----------------------------------------------------------------------  Recursive Elimination Functions----------------------------------------------------------------------*/static int re_vecs (TALIST *lists, int i, int d, int pfx, RELIM *re){                               /* --- recursive elimination */  int    k, x, item;            /* loop variable, buffers */  TALIST *subls, *s;            /* lists of subset of transactions */  TALE   *elems;                /* elements of transaction lists */  TALE   *src, *dst, *buf;      /* to traverse the transaction lists */  subls = re->lists[d];         /* get the subset lists vector */  if (!subls) {                 /* if it does not exist yet */    re->lists[d] = subls = (TALIST*)calloc(re->cnt, sizeof(TALIST));    if (!subls) return -1;      /* create a new subset lists vector */  }                             /* and store it in the vector buffer */  elems = (TALE*)re->elems[d];  /* get the list elements vector */  if (!elems) {                 /* if it does not exist yet */    re->elems[d] = elems = (TALE*)  malloc(re->trcnt *sizeof(TALE));    if (!elems) return -1;      /* create a new list elements vector */  }                             /* and store it in the vector buffer */  for (d++; i < re->cnt; i++) { /* traverse the transaction lists */    s = lists +i;               /* get the next transaction list */    if (s->supp <= 0) continue; /* skip empty transaction lists */    dst = NULL; x = 0;          /* default: only eliminate item */    if (s->supp >= re->supp) {  /* if the support is high enough */      if (d < re->max)          /* if not already at maximum size, */        dst = elems;            /* collect subset of transactions */      re->items[d-1] = i;       /* store the last item of the set */      if (d >= re->min) {       /* if the item set is large enough */        x = re->report(re->items, d, pfx, s->supp);        if (x) { re->isc++; pfx = d-1; }      }                         /* report the frequent item set and */    }                           /* update counter and prefix length */    s->supp = 0;                /* clear the transaction counter */    src     = (TALE*)s->head;   /* get the transaction list */    if (!src) continue;         /* skip empty transaction lists */    s->head = NULL;             /* clear the list's head pointer */    do {                        /* item elimination loop */      buf  = src;               /* get the next list element and */      src  = src->succ;         /* remove it from the trans. list */      item = *buf->items++;     /* get and remove the first item */      if (item & RE_EOT) {      /* if there is only the first item, */        item &= ~RE_EOT;        /* get this item and increment the */        lists[item].supp++;     /* corresponding element counters */        if (dst) { dst++; subls[item].supp++; } }      else {                    /* if there is more than one item */        buf->succ = (TALE*)lists[item].head;        lists[item].head = buf; /* move elem. to corresp. trans. list */        lists[item].supp++;     /* and count the added transaction */        if (dst) {              /* if to collect transactions */          dst->items = buf->items;          dst->succ  = (TALE*)subls[item].head;          subls[item].head = dst++;          subls[item].supp++;   /* add the transaction in a new */        }                       /* list element at the head of the */      }                         /* corresponding transaction sublist */    } while (src);              /* while transaction list not empty */    if (!dst) continue;         /* if no trans. collected, skip rest */    k = (int)(dst -elems);      /* compute number of transactions */    if (k >= re->supp) {        /* if subset support is big enough */      if (re_vecs(subls, i+1, d, pfx +x, re) < 0)        return -1; }            /* process transactions recursively */    else if (k > 0) {           /* if trans. have been collected */      for (k = re->cnt; --k > i; ) {        subls[k].head = NULL;   /* clear all lists that may have */        subls[k].supp = 0;      /* been filled with transactions */      }                         /* to have a clean list vector */    }                           /* for the next collection run */  }  return 0;                     /* return 'ok' */}  /* re_vecs() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -