⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 istree.c

📁 数据挖掘中的关联规则算法
💻 C
📖 第 1 页 / 共 5 页
字号:
/*----------------------------------------------------------------------  File    : istree.c  Contents: item set tree management  Author  : Christian Borgelt  History : 22.01.1996 file created            07.02.1996 _child, _count, ist_addlvl, and ist_count            09.02.1996 ist_rule programmed and debugged            10.02.1996 empty rule bodies made optional            28.03.1996 support made relative to number of item sets            25.06.1996 function _count optimized            23.11.1996 rule extraction redesigned            24.11.1996 rule selection criteria added            18.08.1997 normalized chi^2 measure added                       parameter minlen added to function ist_init()            15.01.1998 confidence comparison changed to >=            23.01.1998 integer support computation changed (ceil)            26.01.1998 condition added to set extension in _child            10.02.1998 bug in computation of EM_INFO fixed            11.02.1998 parameter 'minval' added to function ist_init()            14.05.1998 item set tree navigation functions added            08.08.1998 item appearances considered for rule selection            20.08.1998 deferred child node vector allocation added            02.09.1998 several assertions added            05.09.1998 bug concerning node id fixed            07.09.1998 function ist_hedge added            22.09.1998 bug in rule extraction (item appearances) fixed            23.09.1998 computation of chi^2 measure simplified            05.02.1999 long int changed to int            25.08.1999 rule extraction simplified            05.11.1999 rule evaluation measure EM_AIMP added            08.11.1999 parameter 'aval' added to function ist_rule            11.11.1999 rule consequents moved to first field            01.12.1999 bug in node reallocation fixed            01.04.2001 functions ist_set and ist_getcntx added,                       functions _count and _getsupp improved            28.12.2001 sort function moved to module tract            07.02.2002 tree clearing removed, counting improved            08.02.2002 child creation improved (check of body support)            10.02.2002 IST_IGNORE bugs fixed (ist_set and ist_hedge)            11.02.2002 memory usage minimization option added            12.02.2002 ist_first and ist_last replaced by ist_next            19.02.2002 transaction tree functions added            09.10.2002 bug in function ist_hedge fixed (conf. comp.)            12.03.2003 parameter lift added to function ist_rule            17.07.2003 check of item usage added (function ist_check)            18.07.2003 maximally frequent item set filter added            11.08.2003 item set filtering generalized (ist_filter)            15.08.2003 renamed new to cur in ist_addlvl (C++ compat.)            14.11.2003 definition of F_HDONLY changed to INT_MIN            02.12.2003 skipping unnecessary subtrees added (_stskip)            03.12.2003 bug in ist_check for rule mining fixed            12.12.2003 padding for 64 bit architecture added            09.05.2004 additional selection measure for sets added            09.12.2004 bug in add. evaluation measure for sets fixed----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <float.h>#include <math.h>#include <assert.h>#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2       0.69314718055994530942   /* ln(2) */#define EPSILON    1e-12        /* to cope with roundoff errors */#define BLKSIZE    32           /* block size for level vector */#define F_HDONLY   INT_MIN      /* flag for head only item in path */#define F_SKIP     INT_MIN      /* flag for subtree skipping */#define ID(n)      ((int)((n)->id & ~F_HDONLY))#define HDONLY(n)  ((int)((n)->id &  F_HDONLY))/*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (double head, double body, double post);/* function to compute an additional evaluation measure *//*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static int _bsearch (int *vec, int n, int id){                               /* --- binary search for an item */  int i, k;                     /* left and middle index */  assert(vec && (n > 0));       /* check the function arguments */  for (i = 0; i < n; ) {        /* while the range is not empty */    k = (i + n) >> 1;           /* get index of middle element */    if      (vec[k] > id) n = k;    else if (vec[k] < id) i = k+1;    else return k;              /* adapt range boundaries or return */  }                             /* the index the id. was found at */  return -1;                    /* return 'not found' */}  /* _bsearch() *//*--------------------------------------------------------------------*/static void _count (ISNODE *node, int *set, int cnt, int min){                               /* --- count transaction recursively */  int    i;                     /* vector index */  int    *map, n;               /* identifier map and its size */  ISNODE **vec;                 /* child node vector */  assert(node                   /* check the function arguments */      && (cnt >= 0) && (set || (cnt <= 0)));  if (node->offset >= 0) {      /* if a pure vector is used */    if (node->chcnt == 0) {     /* if this is a new node */      n = node->offset;         /* get the index offset */      while ((cnt > 0) && (*set < n)) {        cnt--; set++; }         /* skip items before first counter */      while (--cnt >= 0) {      /* traverse the transaction's items */        i = *set++ -n;          /* compute counter vector index */        if (i >= node->size) return;        node->cnts[i]++;        /* if the counter exists, */      } }                       /* count the transaction */    else if (node->chcnt > 0) { /* if there are child nodes */      vec = (ISNODE**)(node->cnts +node->size);      n   = ID(vec[0]);         /* get the child node vector */      min--;                    /* one item less to the deepest nodes */      while ((cnt > min) && (*set < n)) {        cnt--; set++; }         /* skip items before first child */      while (--cnt >= min) {    /* traverse the transaction's items */        i = *set++ -n;          /* compute child vector index */        if (i >= node->chcnt) return;        if (vec[i]) _count(vec[i], set, cnt, min);      }                         /* if the child exists, */    } }                         /* count the transaction recursively */  else {                        /* if an identifer map is used */    map = node->cnts +(n = node->size);    if (node->chcnt == 0) {     /* if this is a new node */      while (--cnt >= 0) {      /* traverse the transaction's items */        if (*set > map[n-1]) return;  /* if beyond last item, abort */        i = _bsearch(map, n, *set++);        if (i >= 0) node->cnts[i]++;      } }                       /* find index and count transaction */    else if (node->chcnt > 0) { /* if there are child nodes */      vec = (ISNODE**)(map +n); /* get id. map and child vector */      if (node->chcnt < n)      /* if a secondary id. map exists */        map = (int*)(vec +(n = node->chcnt));      min--;                    /* one item less to the deepest nodes */      while (--cnt >= min) {    /* traverse the transaction's items */        if (*set > map[n-1]) return;  /* if beyond last item, abort */        i = _bsearch(map, n, *set++);        if ((i >= 0) && vec[i]) _count(vec[i], set, cnt, min);      }                         /* search for the proper index */    }                           /* and if the child exists, */  }                             /* count the transaction recursively */}  /* _count() *//*--------------------------------------------------------------------*/static void _countx (ISNODE *node, TATREE *tat, int min){                               /* --- count t.a. tree recursively */  int    i, k;                  /* vector index, loop variable */  int    *map, n;               /* identifier map and its size */  ISNODE **vec;                 /* child node vector */  assert(node && tat);          /* check the function arguments */  if (tat_max(tat) < min)       /* if the transactions are too short, */    return;                     /* abort the recursion */  k = tat_size(tat);            /* get the number of children */  if (k <= 0) {                 /* if there are no children */    if (k < 0) _count(node, tat_items(tat), -k, min);    return;                     /* count the normal transaction */  }                             /* and abort the function */  while (--k >= 0)              /* count the transactions recursively */    _countx(node, tat_child(tat, k), min);  if (node->offset >= 0) {      /* if a pure vector is used */    if (node->chcnt == 0) {     /* if this is a new node */      n = node->offset;         /* get the index offset */      for (k = tat_size(tat); --k >= 0; ) {        i = tat_item(tat,k) -n; /* traverse the items */        if (i < 0) return;      /* if before first item, abort */        if (i < node->size)     /* if inside the counter range */          node->cnts[i] += tat_cnt(tat_child(tat, k));      } }                       /* count the transaction */    else if (node->chcnt > 0) { /* if there are child nodes */      vec = (ISNODE**)(node->cnts +node->size);      n   = ID(vec[0]);         /* get the child node vector */      min--;                    /* one item less to the deepest nodes */      for (k = tat_size(tat); --k >= 0; ) {        i = tat_item(tat,k) -n; /* traverse the items */        if (i < 0) return;      /* if before first item, abort */        if ((i < node->chcnt) && vec[i])          _countx(vec[i], tat_child(tat, k), min);      }                         /* if the child exists, */    } }                         /* count the transaction recursively */  else {                        /* if an identifer map is used */    map = node->cnts +(n = node->size);    if (node->chcnt == 0) {     /* if this is a new node */      for (k = tat_size(tat); --k >= 0; ) {        i = tat_item(tat, k);   /* get the next item */        if (i < map[0]) return; /* if before first item, abort */        i = _bsearch(map, n, i);        if (i >= 0) node->cnts[i] += tat_cnt(tat_child(tat, k));      } }                       /* find index and count transaction */    else if (node->chcnt > 0) { /* if there are child nodes */      vec = (ISNODE**)(map +n); /* get id. map and child vector */      if (node->chcnt < n)      /* if a secondary id. map exists */        map = (int*)(vec +(n = node->chcnt));      min--;                    /* one item less to the deepest nodes */      for (k = tat_size(tat); --k >= 0; ) {        i = tat_item(tat, k);   /* get the next item */        if (i < map[0]) return; /* if before first item, abort */        i = _bsearch(map, n, i);        if ((i >= 0) && vec[i]) _countx(vec[i], tat_child(tat, k), min);      }                         /* search for the proper index */    }                           /* and if the child exists, */  }                             /* count the transaction recursively */}  /* _countx() *//*--------------------------------------------------------------------*/static int _stskip (ISNODE *node){                               /* --- set subtree skip flags */  int    i, r;                  /* vector index, result */  ISNODE **vec;                 /* child node vector */  assert(node);                 /* check the function argument */  if (node->chcnt  == 0) return  0;  /* do not skip new leaves */  if (node->chcnt  <  0) return -1;  /* skip marked subtrees */  if (node->offset >= 0)        /* if a pure vector is used */    vec = (ISNODE**)(node->cnts +node->size);  else                          /* if an identifer map is used */    vec = (ISNODE**)(node->cnts +node->size +node->size);  for (r = -1, i = node->chcnt; --i >= 0; )    if (vec[i]) r &= _stskip(vec[i]);  if (!r) return 0;             /* recursively check all children */  node->chcnt |= F_SKIP;        /* set the skip flag if possible */  return -1;                    /* return 'skip subtree' */}  /* _stskip() *//*--------------------------------------------------------------------*/static int _check (ISNODE *node, char *marks, int supp){                               /* --- recursively check item usage */  int    i, r = 0;              /* vector index, result of check */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -