⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 relim.c

📁 数据挖掘中的一算法 relim算法
💻 C
📖 第 1 页 / 共 3 页
字号:
/*----------------------------------------------------------------------  File    : relim.c  Contents: relim algorithm for finding frequent item sets  Author  : Christian Borgelt  History : 05.11.2004 file created from eclat.c            17.11.2004 first reasonably fast version completed            18.11.2004 start of loop over transactions lists improved            20.11.2004 transaction tree version added            23.11.2004 absolute/relative support output changed            09.12.2004 filter added (binary logarithm of supp. quotient)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <limits.h>#include <float.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "relim"#define DESCRIPTION "find frequent item sets with the relim algorithm"#define VERSION     "version 1.10 (2004.12.15)        " \                    "(c) 2004   Christian Borgelt"/* --- marker --- */#define RE_EOT      INT_MIN     /* end of transaction marker *//* --- output flags --- */#define OF_REL      0x01        /* print relative support */#define OF_ABS      0x02        /* print absolute support */#define OF_DEV      0x04        /* print deviation from indep. occ. */#define OF_SCANF    0x08        /* convert names to scanable form *//* --- error codes --- */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* too few/many arguments */#define E_STDIN     (-8)        /* double assignment of stdin */#define E_SUPP      (-9)        /* invalid item set support */#define E_ITEMCNT  (-10)        /* invalid number of items */#define E_NOTAS    (-11)        /* no items or transactions */#define E_UNKNOWN  (-18)        /* unknown error */#ifndef QUIET                   /* if not quiet version */#define MSG(x)        x         /* print messages */#else                           /* if quiet version */#define MSG(x)                  /* suppress messages */#endif#define SEC_SINCE(t)  ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s)     (tfs_reccnt(is_tfscan(s)) \                      + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))#define BUFFER(s)     tfs_buf(is_tfscan(s))/*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef int REPFN (int *ids, int cnt, int supp);typedef struct _tale {          /* --- transaction list element --- */  struct _tale *succ;           /* pointer to successor element */  int          *items;          /* items in the transaction */} TALE;                         /* (transaction list element) */typedef struct _ttle {          /* --- trans. tree list element --- */  struct _ttle *succ;           /* pointer to successor element */  void         *items;          /* items in the transaction (tree) */  int          supp;            /* number of represented transactions */} TTLE;                         /* (transaction tree list element) */typedef struct {                /* --- transaction (tree) list --- */  void   *head;                 /* head element of the list */  int    supp;                  /* number of transactions */} TALIST;                       /* (transaction (tree) list) */typedef struct {                /* --- recursive elimination search */  int    cnt;                   /* total number of items */  int    min;                   /* minimum number of items */  int    max;                   /* maximum number of items */  int    trcnt;                 /* total number of transactions */  int    supp;                  /* minimum support (number of trans.) */  REPFN  *report;               /* report function for results */  int    isc;                   /* number of frequent item sets */  TALIST **lists;               /* vectors of trans. (tree) lists */  void   **elems;               /* vectors of list elements */  int    items[1];              /* item vector for reporting */} RELIM;                        /* (recursive elimination search) *//*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/#define LN_2       0.69314718055994530942   /* ln(2) */#ifndef QUIET                   /* if not quiet version *//* --- error messages --- */static const char *errmsgs[] = {  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_STDIN    -8 */  "double assignment of standard input\n",  /* E_SUPP     -9 */  "invalid minimal support %g%%\n",  /* E_ITEMCNT -10 */  "invalid number of items %d\n",  /* E_NOTAS   -11 */  "no items or transactions to work on\n",  /*    -12 to -15 */  NULL, NULL, NULL, NULL,  /* E_ITEMEXP -16 */  "file %s, record %d: item expected\n",  /* E_DUPITEM -17 */  "file %s, record %d: duplicate item %s\n",  /* E_UNKNOWN -18 */  "unknown error\n"};#endif/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char    *prgname;        /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET   *taset   = NULL; /* transaction set */static TATREE  *tatree  = NULL; /* transaction tree */static FILE    *in      = NULL; /* input  file */static FILE    *out     = NULL; /* output file */static int     tacnt    = 0;    /* number of transactions */static double  mindev   = -DBL_MAX; /* minimal value of deviation */static double  *logfs   = NULL;     /* logarithms of item frequencies */static int     flags    = OF_REL;   /* output flags */static char    *fmt     = "%.1f";   /* output format for support */static char    buf[4*TFS_SIZE+4];   /* buffer for formatting *//*----------------------------------------------------------------------  Item Set Report Function----------------------------------------------------------------------*/static int report (int *ids, int cnt, int supp){                               /* --- report a frequent item set */  int        i;                 /* loop variable */  const char *name;             /* buffer for item name */  double     dev;               /* deviation from indep. occurrence */  assert(ids);                  /* check the function arguments */  if (flags & OF_DEV) {         /* if to filter w.r.t. deviation */    for (dev = 0, i = cnt; --i >= 0; )      dev += logfs[ids[i]];     /* sum the logs of the item freqs. */    dev = (log(supp/(double)tacnt) -dev) /LN_2;    if (dev < mindev) return 0; /* compute and check the deviation */  }                             /* from an independent occurrence */  for (i = 0; i < cnt; i++) {   /* traverse the item set */    name = is_name(itemset, ids[i]);    if (flags & OF_SCANF) {     /* convert to scanable form */      sc_format(buf, name, 0); name = buf; }    fputs(name, out); fputc(' ', out);  }                             /* print the item names */  fputs(" (", out);             /* print rel. and abs. support */  if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100);                        fputs((flags & OF_ABS) ? "%/" : "%", out); }  if (flags & OF_ABS) { fprintf(out, "%d", supp); }  if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); }  fputs(")\n", out);            /* terminate the support output */  return 1;                     /* return 'reported 1 item set' */}  /* report() *//*----------------------------------------------------------------------  Recursive Elimination Functions----------------------------------------------------------------------*/static int re_vecs (TALIST *lists, int d, int i, RELIM *re){                               /* --- recursive elimination */  int    k, item;               /* loop variable, buffers */  TALIST *subls, *s;            /* lists of subset of transactions */  TALE   *elems;                /* elements of transaction lists */  TALE   *src, *dst, *buf;      /* to traverse the transaction lists */  subls = re->lists[d];         /* get the subset lists vector */  if (!subls) {                 /* if it does not exist yet */    re->lists[d] = subls = (TALIST*)calloc(re->cnt, sizeof(TALIST));    if (!subls) return -1;      /* create a new subset lists vector */  }                             /* and store it in the vector buffer */  elems = (TALE*)re->elems[d];  /* get the list elements vector */  if (!elems) {                 /* if it does not exist yet */    re->elems[d] = elems = (TALE*)  malloc(re->trcnt *sizeof(TALE));    if (!elems) return -1;      /* create a new list elements vector */  }                             /* and store it in the vector buffer */  for (d++; i < re->cnt; i++) { /* traverse the transaction lists */    s = lists +i;               /* get the next transaction list */    if (s->supp <= 0) continue; /* skip empty transaction lists */    dst = NULL;                 /* default: only eliminate item */    if (s->supp >= re->supp) {  /* if the support is high enough */      if (d < re->max)          /* if not already at maximum size, */        dst = elems;            /* collect subset of transactions */      re->items[d-1] = i;       /* store the last item of the set */      if (d >= re->min)         /* if the item set is large enough */        re->isc += re->report(re->items, d, s->supp);    }                           /* report the frequent item set */    s->supp = 0;                /* clear the transaction counter */    src     = (TALE*)s->head;   /* get the transaction list */    if (!src) continue;         /* skip empty transaction lists */    s->head = NULL;             /* clear the list's head pointer */    do {                        /* item elimination loop */      buf  = src;               /* get the next list element and */      src  = src->succ;         /* remove it from the trans. list */      item = *buf->items++;     /* get and remove the first item */      if (item & RE_EOT) {      /* if there is only the first item, */        item &= ~RE_EOT;        /* get this item and increment the */        lists[item].supp++;     /* corresponding element counters */        if (dst) { dst++; subls[item].supp++; } }      else {                    /* if there is more than one item */        buf->succ = (TALE*)lists[item].head;        lists[item].head = buf; /* move elem. to corresp. trans. list */        lists[item].supp++;     /* and count the added transaction */        if (dst) {              /* if to collect transactions */          dst->items = buf->items;          dst->succ  = (TALE*)subls[item].head;          subls[item].head = dst++;          subls[item].supp++;   /* add the transaction in a new */        }                       /* list element at the head of the */      }                         /* corresponding transaction sublist */    } while (src);              /* while transaction list not empty */    if (!dst) continue;         /* if no trans. collected, skip rest */    k = (int)(dst -elems);      /* compute number of transactions */    if (k >= re->supp) {        /* if subset support is big enough */      if (re_vecs(subls, d, i+1, re) < 0)        return -1; }            /* process transactions recursively */    else if (k > 0) {           /* if trans. have been collected */      for (k = re->cnt; --k > i; ) {        subls[k].head = NULL;   /* clear all lists that may have */        subls[k].supp = 0;      /* been filled with transactions */      }                         /* to have a clean list vector */    }                           /* for the next collection run */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -