📄 istree.c
字号:
/*---------------------------------------------------------------------- File : istree.c Contents: item set tree management Author : Christian Borgelt History : 1996.01.22 file created 1996.02.07 _child, _count, ist_addlvl, and ist_count 1996.02.09 ist_rule programmed and debugged 1996.02.10 empty rule bodies made optional 1996.03.28 support made relative to number of item sets 1996.06.25 function _count optimized 1996.11.23 rule extraction redesigned 1996.11.24 rule selection criteria added 1997.08.18 normalized chi^2 measure added parameter minlen added to function ist_init() 1998.01.15 confidence comparison changed to >= 1998.01.23 integer support computation changed (ceil) 1998.01.26 condition added to set extension in _child 1998.02.10 bug in computation of EM_INFO fixed 1998.02.11 parameter 'minval' added to function ist_init() 1998.05.14 item set tree navigation functions added 1998.08.08 item appearances considered for rule selection 1998.08.20 deferred child node vector allocation added 1998.09.02 several assertions added 1998.09.05 bug concerning node id fixed 1998.09.07 function ist_hedge added 1998.09.22 bug in rule extraction (item appearances) fixed 1998.09.23 computation of chi^2 measure simplified 1999.02.05 long int changed to int 1999.08.25 rule extraction simplified 1999.11.05 rule evaluation measure EM_AIMP added 1999.11.08 parameter 'aval' added to function ist_rule 1999.11.11 rule consequents moved to first field 1999.12.01 bug in node reallocation fixed 2001.04.01 functions ist_set and ist_getcntx added, functions _count and _getsupp improved 2001.12.28 sort function moved to module tract 2002.02.07 tree clearing removed, counting improved 2002.02.08 child creation improved (check of body support) 2002.02.10 IST_IGNORE bugs fixed (ist_set and ist_hedge) 2002.02.11 memory usage minimization option added 2002.02.12 ist_first and ist_last replaced by ist_next 2002.02.19 transaction tree functions added 2002.10.09 bug in function ist_hedge fixed (conf. comp.) 2003.03.12 parameter lift added to function ist_rule 2003.07.17 check of item usage added (function ist_check) 2003.07.18 maximally frequent item set filter added 2003.08.11 item set filtering generalized (ist_filter) 2003.08.15 renamed new to cur in ist_addlvl (C++ compat.) 2003.11.14 definition of F_HDONLY changed to INT_MIN 2003.12.02 skipping unnecessary subtrees added (_checksub) 2003.12.03 bug in ist_check for rule mining fixed 2003.12.12 padding for 64 bit architecture added 2004.05.09 additional selection measure for sets added 2004.12.09 bug in add. evaluation measure for sets fixed 2006.11.26 support parameter changed to an absolute value 2007.02.07 bug in function ist_addlvl / _child fixed 2008.01.25 bug in filtering closed/maximal item sets fixed 2008.03.13 additional rule evaluation redesigned 2008.03.24 creation based on ITEMSET structure----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <float.h>#include <math.h>#include <assert.h>#include "istree.h"#include "chi2.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) */#define EPSILON 1e-12 /* to cope with roundoff errors */#define BLKSIZE 32 /* block size for level vector */#define F_HDONLY INT_MIN /* flag for head only item in path */#define F_SKIP INT_MIN /* flag for subtree skipping */#define ID(n) ((int)((n)->id & ~F_HDONLY))#define HDONLY(n) ((int)((n)->id & F_HDONLY))#define COUNT(n) ((n) & ~F_SKIP)/*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (int set, int body, int head, int n);/* function to compute an additional evaluation measure *//*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static int _bsearch (int *vec, int n, int id){ /* --- binary search for an item */ int i, k; /* left and middle index */ assert(vec && (n > 0)); /* check the function arguments */ for (i = 0; i < n; ) { /* while the range is not empty */ k = (i + n) >> 1; /* get index of middle element */ if (vec[k] > id) n = k; else if (vec[k] < id) i = k+1; else return k; /* adapt range boundaries or return */ } /* the index the id. was found at */ return -1; /* return 'not found' */} /* _bsearch() *//*--------------------------------------------------------------------*/static void _count (ISNODE *node, int *set, int cnt, int min){ /* --- count transaction recursively */ int i; /* vector index */ int *map, n; /* identifier map and its size */ ISNODE **vec; /* child node vector */ assert(node /* check the function arguments */ && (cnt >= 0) && (set || (cnt <= 0))); if (node->offset >= 0) { /* if a pure vector is used */ if (node->chcnt == 0) { /* if this is a new node */ n = node->offset; /* get the index offset */ while ((cnt > 0) && (*set < n)) { cnt--; set++; } /* skip items before first counter */ while (--cnt >= 0) { /* traverse the transaction's items */ i = *set++ -n; /* compute counter vector index */ if (i >= node->size) return; node->cnts[i]++; /* if the counter exists, */ } } /* count the transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(node->cnts +node->size); n = ID(vec[0]); /* get the child node vector */ min--; /* one item less to the deepest nodes */ while ((cnt > min) && (*set < n)) { cnt--; set++; } /* skip items before first child */ while (--cnt >= min) { /* traverse the transaction's items */ i = *set++ -n; /* compute child vector index */ if (i >= node->chcnt) return; if (vec[i]) _count(vec[i], set, cnt, min); } /* if the child exists, */ } } /* count the transaction recursively */ else { /* if an identifer map is used */ map = node->cnts +(n = node->size); if (node->chcnt == 0) { /* if this is a new node */ while (--cnt >= 0) { /* traverse the transaction's items */ if (*set > map[n-1]) return; /* if beyond last item, abort */ i = _bsearch(map, n, *set++); if (i >= 0) node->cnts[i]++; } } /* find index and count transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(map +n); /* get id. map and child vector */ if (node->chcnt < n) /* if a secondary id. map exists */ map = (int*)(vec +(n = node->chcnt)); min--; /* one item less to the deepest nodes */ while (--cnt >= min) { /* traverse the transaction's items */ if (*set > map[n-1]) return; /* if beyond last item, abort */ i = _bsearch(map, n, *set++); if ((i >= 0) && vec[i]) _count(vec[i], set, cnt, min); } /* search for the proper index */ } /* and if the child exists, */ } /* count the transaction recursively */} /* _count() *//*--------------------------------------------------------------------*/static void _countx (ISNODE *node, TATREE *tat, int min){ /* --- count transa. tree recursively */ int i, k; /* vector index, loop variable */ int *map, n; /* identifier map and its size */ ISNODE **vec; /* child node vector */ assert(node && tat); /* check the function arguments */ if (tat_max(tat) < min) /* if the transactions are too short, */ return; /* abort the recursion */ k = tat_size(tat); /* get the number of children */ if (k <= 0) { /* if there are no children */ if (k < 0) _count(node, tat_items(tat), -k, min); return; /* count the normal transaction */ } /* and abort the function */ while (--k >= 0) /* count the transactions recursively */ _countx(node, tat_child(tat, k), min); if (node->offset >= 0) { /* if a pure vector is used */ if (node->chcnt == 0) { /* if this is a new node */ n = node->offset; /* get the index offset */ for (k = tat_size(tat); --k >= 0; ) { i = tat_item(tat,k) -n; /* traverse the items */ if (i < 0) return; /* if before first item, abort */ if (i < node->size) /* if inside the counter range */ node->cnts[i] += tat_cnt(tat_child(tat, k)); } } /* count the transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(node->cnts +node->size); n = ID(vec[0]); /* get the child node vector */ min--; /* one item less to the deepest nodes */ for (k = tat_size(tat); --k >= 0; ) { i = tat_item(tat,k) -n; /* traverse the items */ if (i < 0) return; /* if before first item, abort */ if ((i < node->chcnt) && vec[i]) _countx(vec[i], tat_child(tat, k), min); } /* if the child exists, */ } } /* count the transaction recursively */ else { /* if an identifer map is used */ map = node->cnts +(n = node->size); if (node->chcnt == 0) { /* if this is a new node */ for (k = tat_size(tat); --k >= 0; ) { i = tat_item(tat, k); /* get the next item */ if (i < map[0]) return; /* if before first item, abort */ i = _bsearch(map, n, i); if (i >= 0) node->cnts[i] += tat_cnt(tat_child(tat, k)); } } /* find index and count transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(map +n); /* get id. map and child vector */ if (node->chcnt < n) /* if a secondary id. map exists */ map = (int*)(vec +(n = node->chcnt)); min--; /* one item less to the deepest nodes */ for (k = tat_size(tat); --k >= 0; ) { i = tat_item(tat, k); /* get the next item */ if (i < map[0]) return; /* if before first item, abort */ i = _bsearch(map, n, i); if ((i >= 0) && vec[i]) _countx(vec[i], tat_child(tat, k), min); } /* search for the proper index */ } /* and if the child exists, */ } /* count the transaction recursively */} /* _countx() *//*--------------------------------------------------------------------*/static int _checksub (ISNODE *node){ /* --- recursively check subtrees */ int i, r; /* vector index, result */ ISNODE **vec; /* child node vector */ assert(node); /* check the function argument */ if (node->chcnt == 0) return 0; /* do not skip new leaves */ if (node->chcnt < 0) return -1; /* skip marked subtrees */ if (node->offset >= 0) /* if a pure vector is used */ vec = (ISNODE**)(node->cnts +node->size); else /* if an identifer map is used */ vec = (ISNODE**)(node->cnts +node->size +node->size); for (r = -1, i = node->chcnt; --i >= 0; ) if (vec[i]) r &= _checksub(vec[i]); if (!r) return 0; /* recursively check all children */ node->chcnt |= F_SKIP; /* set the skip flag if possible */ return -1; /* return 'subtree can be skipped' */} /* _checksub() *//*--------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -