📄 istree.c
字号:
/*---------------------------------------------------------------------- File : istree.c Contents: item set tree management Author : Christian Borgelt History : 22.01.1996 file created 07.02.1996 _child, _count, ist_addlvl, and ist_count 09.02.1996 ist_rule programmed and debugged 10.02.1996 empty rule bodies made optional 28.03.1996 support made relative to number of item sets 25.06.1996 function _count optimized 23.11.1996 rule extraction redesigned 24.11.1996 rule selection criteria added 18.08.1997 normalized chi^2 measure added parameter minlen added to function ist_init() 15.01.1998 confidence comparison changed to >= 23.01.1998 integer support computation changed (ceil) 26.01.1998 condition added to set extension in _child 10.02.1998 bug in computation of EM_INFO fixed 11.02.1998 parameter 'minval' added to function ist_init() 14.05.1998 item set tree navigation functions added 08.08.1998 item appearances considered for rule selection 20.08.1998 deferred child node vector allocation added 02.09.1998 several assertions added 05.09.1998 bug concerning node id fixed 07.09.1998 function ist_hedge added 22.09.1998 bug in rule extraction (item appearances) fixed 23.09.1998 computation of chi^2 measure simplified 05.02.1999 long int changed to int 25.08.1999 rule extraction simplified 05.11.1999 rule evaluation measure EM_AIMP added 08.11.1999 parameter 'aval' added to function ist_rule 11.11.1999 rule consequents moved to first field 01.12.1999 bug in node reallocation fixed 01.04.2001 functions ist_set and ist_getcntx added, functions _count and _getsupp improved 28.12.2001 sort function moved to module tract 07.02.2002 tree clearing removed, counting improved 08.02.2002 child creation improved (check of body support) 10.02.2002 IST_IGNORE bugs fixed (ist_set and ist_hedge) 11.02.2002 memory usage minimization option added 12.02.2002 ist_first and ist_last replaced by ist_next----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <float.h>#include <math.h>#include <assert.h>#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define LN_2 0.69314718055994530942 /* ln(2) */#define EPSILON 1e-12 /* to cope with roundoff errors */#define BLKSIZE 32 /* block size for level vector */#define F_HDONLY 0x80000000 /* flag for head only item in path */#define ID(n) ((int)((n)->id & ~F_HDONLY))#define HDONLY(n) ((int)((n)->id & F_HDONLY))/*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef double EVALFN (double head, double body, double post);/* function to compute an additional evaluation measure *//*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static int _bsearch (int *vec, int n, int id){ /* --- binary search for an item */ int i, k; /* left and middle index */ assert(vec && (n > 0)); /* check the function arguments */ for (i = 0; i < n; ) { /* while the range is not empty */ k = (i + n) >> 1; /* get index of middle element */ if (vec[k] > id) n = k; else if (vec[k] < id) i = k+1; else return k; /* adapt range boundaries or return */ } /* the index the id. was found at */ return -1; /* return 'not found' */} /* _bsearch() *//*--------------------------------------------------------------------*/static void _count (ISNODE *node, int *set, int cnt, int min){ /* --- count transaction recursively */ int i; /* vector index */ int *map, n; /* identifier map and its size */ ISNODE **vec; /* child node vector */ assert(node /* check the function arguments */ && (cnt >= 0) && (set || (cnt <= 0))); if (node->offset >= 0) { /* if a pure vector is used */ if (node->chcnt < 0) { /* if this is a new node */ n = node->offset; /* get the index offset */ while ((cnt > 0) && (*set < n)) { cnt--; set++; } /* skip items before first counter */ while (--cnt >= 0) { /* traverse the transaction's items */ i = *set++ -n; /* compute counter vector index */ if (i >= node->size) return; node->cnts[i]++; /* if the counter exists, */ } } /* count the transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(node->cnts +node->size); n = ID(vec[0]); /* get the child node vector */ min--; /* one item less to the deepest nodes */ while ((cnt > min) && (*set < n)) { cnt--; set++; } /* skip items before first child */ while (--cnt >= min) { /* traverse the transaction's items */ i = *set++ -n; /* compute child vector index */ if (i >= node->chcnt) return; if (vec[i]) _count(vec[i], set, cnt, min); } /* if the child exists, */ } } /* count the transaction recursively */ else { /* if an identifer map is used */ map = node->cnts +(n = node->size); if (node->chcnt < 0) { /* if this is a new node */ while (--cnt >= 0) { /* traverse the transaction's items */ if (*set > map[n-1]) return; /* if beyond last item, abort */ i = _bsearch(map, n, *set++); if (i >= 0) node->cnts[i]++; } } /* find index and count transaction */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(map +n); /* get id. map and child vector */ if (node->chcnt < n) /* if a secondary id. map exists */ map = (int*)(vec +(n = node->chcnt)); min--; /* one item less to the deepest nodes */ while (--cnt >= min) { /* traverse the transaction's items */ if (*set > map[n-1]) return; /* if beyond last item, abort */ i = _bsearch(map, n, *set++); if ((i >= 0) && vec[i]) _count(vec[i], set, cnt, min); } /* search for the proper index */ } /* and if the child exists, */ } /* count the transaction recursively */} /* _count() *//*--------------------------------------------------------------------*/static int _getsupp (ISNODE *node, int *set, int cnt){ /* --- get support of an item set */ int i, n; /* vector index, buffer */ int *map; /* identifier map */ ISNODE **vec; /* vector of child nodes */ assert(node && set && (cnt >= 0)); /* check the function arguments */ while (--cnt > 0) { /* follow the set/path from the node */ if (node->chcnt <= 0) /* if there are no children, */ return -1; /* the support is less than minsupp */ if (node->offset >= 0) { /* if a pure vector is used */ vec = (ISNODE**)(node->cnts +node->size); i = *set++ -ID(vec[0]); /* compute the child vector index */ if (i >= node->chcnt) return -1; } else { /* if an identifier map is used */ map = node->cnts +(n = node->size); vec = (ISNODE**)(map +n); /* get id. map and child vector */ if (node->chcnt < n) /* if a secondary id. map exists */ map = (int*)(vec +(n = node->chcnt)); i = _bsearch(map, n, *set++); } /* search for the proper index */ if (i < 0) return -1; /* abort if index is out of range */ node = vec[i]; /* go to the corresponding child */ if (!node) return -1; /* if the child does not exists, */ } /* the support is less than minsupp */ if (node->offset >= 0) { /* if a pure vector is used, */ i = *set -node->offset; /* compute the counter index */ if (i >= node->size) return -1; } else { /* if an identifier map is used */ map = node->cnts +(n = node->size); i = _bsearch(map, n, *set); } /* search for the proper index */ if (i < 0) return -1; /* abort if index is out of range */ return node->cnts[i]; /* return the item set support */} /* _getsupp() *//*--------------------------------------------------------------------*/static ISNODE* _child (ISTREE *ist, ISNODE *node, int index, int s_min, int s_sub){ /* --- create child node (extend set) */ int i, k, n; /* loop variables, counters */ ISNODE *curr; /* to traverse the path to the root */ int item, cnt; /* item identifier, number of items */ int *set; /* next (partial) item set to check */ int body; /* enough support for a rule body */ int hdonly; /* whether head only item on path */ int app; /* appearance flags of an item */ int s_set; /* support of an item set */ assert(ist && node /* check the function arguments */ && (index >= 0) && (index < node->size)); if (node->offset >= 0) item = node->offset +index; else item = node->cnts[node->size +index]; app = ist->apps[item]; /* get item id. and appearance flag */ if ((app == IST_IGNORE) /* do not extend an item to ignore */ || ((HDONLY(node) && (app == IST_HEAD)))) return NULL; /* nor a set with two head only items */ hdonly = HDONLY(node) || (app == IST_HEAD); /* --- initialize --- */ s_set = node->cnts[index]; /* get support of item set to extend */ if (s_set < s_min) /* if set support is insufficient, */ return NULL; /* no child is needed, so abort */ body = (s_set >= s_sub) /* if the set has enough support for */ ? 1 : 0; /* a rule body, set the body flag */ ist->buf[ist->lvlvsz -2] = item; /* init. set for support checks */ /* --- check candidates --- */ for (n = 0, i = index +1; i < node->size; i++) { if (node->offset >= 0) k = node->offset +i; else k = node->cnts[node->size +i]; app = ist->apps[k]; /* traverse the candidate items */ if ((app == IST_IGNORE) || (hdonly && (app == IST_HEAD))) continue; /* skip sets with two head only items */ s_set = node->cnts[i]; /* traverse the candidate items */ if (s_set < s_min) /* if set support is insufficient, */ continue; /* ignore the corresponding candidate */ body &= 1; /* restrict body flags to the set S */ if (s_set >= s_sub) /* if set support is sufficient for */ body |= 2; /* a rule body, set the body flag */ set = ist->buf +ist->lvlvsz -(cnt = 2); set[1] = k; /* add the candidate item to the set */ for (curr = node; curr->parent; curr = curr->parent) { s_set = _getsupp(curr->parent, set, cnt); if (s_set < s_min) /* get the item set support and */ break; /* if it is too low, abort the loop */ if (s_set >= s_sub) /* if some subset has enough support */ body |= 4; /* for a rule body, set the body flag */ *--set = ID(curr); cnt++; /* add id of current node to the set */ } /* and adapt the number of items */ if (!curr->parent && body) /* if subset support is high enough */ ist->map[n++] = k; /* for a full rule and a rule body, */ } /* note the item identifier */ if (n <= 0) return NULL; /* if no child is needed, abort */ /* --- decide on node structure --- */ k = ist->map[n-1] -ist->map[0] +1; if (!ist->memopt) n = k; /* check the size of a pure vector */ else if (3*n >= 2*k) n = k; /* use a pure vector if it is small */ else k = n+n; /* enough, otherwise use an id. map */ /* --- create child --- */ curr = (ISNODE*)malloc(sizeof(ISNODE) +(k-1) *sizeof(int)); if (!curr) return (void*)-1; /* create a child node */ curr->parent = node; /* set pointer to parent node */ curr->succ = NULL; /* and clear successor pointer */ curr->id = item; /* initialize the item id. and */ if (hdonly) curr->id |= F_HDONLY; /* set the head only flag */ curr->chcnt = -1; /* there are no children yet */ curr->size = n; /* set size of counter vector */ if (n == k) /* if to use a pure vector, */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -