📄 istree.c
字号:
curr->offset = ist->map[0]; /* note the first item as an offset */ else { /* if to use an identifier map, */ curr->offset = -1; /* use the offset as an indicator */ for (set = curr->cnts +n +(i = n); --i >= 0; ) *--set = ist->map[i]; /* copy the identifier map */ } /* from the buffer to the node */ for (set = curr->cnts +(i = n); --i >= 0; ) *--set = 0; /* clear all counters of the node */ return curr; /* return pointer to created child */} /* _child() *//*---------------------------------------------------------------------- In the above function the set S represented by the index-th vectorelement of the current node is extended only by combining it with thesets represented by the fields that follow it in the node vector,i.e. by the sets represented by vec[index+1] to vec[size-1]. The setsthat can be formed by combining the set S and the sets represented byvec[0] to vec[index-1] are processed in the branches for these sets. In the 'check candidates' loop it is checked for each set representedby vec[index+1] to vec[size-1] whether this set and all other subsetsof the same size, which can be formed from the union of this set andthe set S, have enough support, so that a child node is necessary. Note that i +offset is the identifier of the item that has to beadded to set S to form the union of the set S and the set T representedby vec[i], since S and T have the same path with the exception of theindex in the current node. Hence we can speak of candidate items thatare added to S. Checking the support of the other subsets of the union of S and Tthat have the same size as S and T is done with the aid of a pathvariable. The items in this variable combined with the items on thepath to the current node always represent the subset currently tested.That is, the path variable holds the path to be followed from thecurrent node to arrive at the support counter for the subset. The pathvariable is initialized to [0]: <item>, [1]: <offset+i>, since thesupport counters for S and T can be inspected directly. Then thispath is followed from the parent node of the current node, which isequivalent to checking the subset that can be obtained by removingfrom the union of S and T the item that corresponds to the parent node(in the path to S or T, resp.). Iteratively making the parent node the current node, adding itscorresponding item to the path and checking the support counter at theend of the path variable when starting from its (the new current node's)parent node tests all other subsets. Another criterion is that the extended set must not contain two itemswhich may appear only in the head of a rule. If two such items arecontained in a set, neither can a rule be formed from its items nor canit be the antecedent of a rule. Whether a set contains two head onlyitems is determined from the nodes 'hdonly' flag and the appearanceflags of the items.----------------------------------------------------------------------*/static void _cleanup (ISTREE *ist){ /* --- clean up on error */ ISNODE *node, *t; /* to traverse the nodes */ assert(ist); /* check the function argument */ for (node = ist->levels[ist->lvlcnt]; node; ) { t = node; node = node->succ; free(t); } ist->levels[ist->lvlcnt] = NULL; /* delete all created nodes */ for (node = ist->levels[ist->lvlcnt -1]; node; node = node->succ) node->chcnt = -1; /* clear the child node counters */} /* _cleanup() */ /* of the deepest nodes in the tree *//*---------------------------------------------------------------------- Additional Evaluation Measure Functions----------------------------------------------------------------------*/static double _none (double head, double body, double post){ return 1; } /* --- no add. evaluation measure *//*--------------------------------------------------------------------*/static double _diff (double head, double body, double post){ return fabs(post -head); } /* --- absolute confidence difference *//*--------------------------------------------------------------------*/static double _quot (double head, double body, double post){ /* --- diff. of conf. quotient to 1 */ if (post > head) return 1 -head/post; return (head <= 0) ? 0 : (1 -post/head);} /* _quot() *//*--------------------------------------------------------------------*/static double _aimp (double head, double body, double post){ /* --- abs. diff. of improvement to 1 */ return (head <= 0) ? 0 : fabs(1 -post/head);} /* _aimp() *//*--------------------------------------------------------------------*/static double _info (double head, double body, double post){ /* --- information diff. to prior */ double res, t; /* result, temporary buffer */ if ((head < EPSILON) || (1-head < EPSILON) || (body < EPSILON) || (1-body < EPSILON)) return 0; /* check for strict positivity */ post *= body; res = 0; /* support of head and body */ if (post > 0) res += post *log(post /( head * body)); t = body -post; /* support of not head and body */ if (t > 0) res += t *log(t /((1-head) * body)); t = head -post; /* support of head and not body */ if (t > 0) res += t *log(t /( head *(1-body))); t = 1-head -body +post; /* support of not head and not body */ if (t > 0) res += t *log(t /((1-head) *(1-body))); return res/LN_2; /* return information gain in bits */} /* _info() *//*--------------------------------------------------------------------*/static double _chi2 (double head, double body, double post){ /* --- normalized chi^2 measure */ double t; /* temporary buffer */ if ((head < EPSILON) || (1-head < EPSILON) || (body < EPSILON) || (1-body < EPSILON)) return 0; /* check for strict positivity */ t = head *body -post *body; /* compute and return chi^2 measure */ return (t*t) / (head *(1-head) *body *(1-body));} /* _chi2() *//*--------------------------------------------------------------------*/static EVALFN *_evalfns[EM_UNKNOWN] = { /* EM_NONE 0 */ _none, /* no additional evaluation measure */ /* EM_DIFF 1 */ _diff, /* absolute confidence difference */ /* EM_QUOT 2 */ _quot, /* difference of conf. quotient to 1 */ /* EM_AIMP 3 */ _aimp, /* abs. diff. of improvement to 1 */ /* EM_INFO 4 */ _info, /* information difference to prior */ /* EM_CHI2 5 */ _chi2, /* normalized chi^2 measure */}; /* table of evaluation functions *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/ISTREE* ist_create (int itemcnt, double supp, double conf, int rsdef, const char *apps, int memopt){ /* --- create an item set tree */ ISTREE *ist; /* created item set tree */ ISNODE **lvl; /* vector of tree levels */ ISNODE *root; /* root node of the tree */ int *buf, *map; /* buffer vector, identifier map */ char *a; /* to traverse appearances vector */ assert((itemcnt >= 0) /* check the function arguments */ && (supp >= 0) && (supp <= 1) && (conf >= 0) && (conf <= 1)); /* --- allocate memory --- */ ist = (ISTREE*)malloc(sizeof(ISTREE) +(itemcnt-1) *sizeof(char)); if (!ist) return NULL; /* allocate the tree body */ ist->levels = lvl = (ISNODE**)malloc(BLKSIZE *sizeof(ISNODE*)); if (!lvl) { free(ist); return NULL; } ist->buf = buf = (int*) malloc(BLKSIZE *sizeof(int)); if (!buf) { free(lvl); free(ist); return NULL; } ist->map = map = (int*) malloc(itemcnt *sizeof(int)); if (!map) { free(buf); free(lvl); free(ist); return NULL; } lvl[0] = ist->curr = root = /* allocate a root node */ (ISNODE*)calloc(1, sizeof(ISNODE) +(itemcnt-1) *sizeof(int)); if (!root){ free(map); free(buf); free(lvl); free(ist); return NULL; } /* --- initialize structures --- */ ist->lvlvsz = BLKSIZE; /* copy parameters to the structure */ ist->lvlcnt = 1; ist->tacnt = 0; ist->supp = supp; ist->conf = conf; ist->rsdef = rsdef & IST_BOTH; ist->memopt = memopt; ist_init(ist, 1, EM_NONE, 1); /* initialize rule extractoin */ root->parent = root->succ = NULL; root->offset = root->id = 0; root->chcnt = -1; /* initialize the root node */ root->size = itemcnt; a = ist->apps; /* copy item appearances */ if (apps) { while (--itemcnt >= 0) *a++ = *apps++ & IST_BOTH; } else { while (--itemcnt >= 0) *a++ = IST_BOTH; } return ist; /* return created item set tree */} /* ist_create() *//*--------------------------------------------------------------------*/void ist_delete (ISTREE *ist){ /* --- delete an item set tree */ int i; /* loop variables */ ISNODE *node, *t; /* to traverse the nodes */ assert(ist); /* check the function argument */ for (i = ist->lvlcnt; --i >= 0; ) { for (node = ist->levels[i]; node; ) { t = node; node = node->succ; free(t); } } /* delete all nodes, */ free(ist->levels); /* the level vector, */ free(ist->map); /* the identifier map, */ free(ist->buf); /* the path buffer, */ free(ist); /* and the tree body */} /* ist_delete() *//*--------------------------------------------------------------------*/void ist_count (ISTREE *ist, int *set, int cnt){ /* --- count transaction in tree */ assert(ist /* check the function arguments */ && (cnt >= 0) && (set || (cnt <= 0))); if (cnt >= ist->lvlcnt) /* recursively count transaction */ _count(ist->levels[0], set, cnt, ist->lvlcnt); ist->tacnt++; /* increment the transaction counter */} /* ist_count() *//*--------------------------------------------------------------------*/int ist_addlvl (ISTREE *ist){ /* --- add a level to item set tree */ int i, n; /* loop variable, counter */ ISNODE **ndp; /* to traverse the nodes */ ISNODE *node; /* new (reallocated) node */ ISNODE **end; /* end of new level node list */ ISNODE *new; /* current node in new level */ ISNODE *frst; /* first child of current node */ ISNODE *last; /* last child of current node */ ISNODE **vec; /* child node vector */ int *map; /* identifier map */ int s_min; /* minimal support of a set */ int s_sub; /* minimal support of a subset */ void *p; /* temporary buffer */ assert(ist); /* check the function arguments */ /* --- enlarge level vector --- */ if (ist->lvlcnt >= ist->lvlvsz) { /* if the level vector is full */ n = ist->lvlvsz +BLKSIZE; /* compute new vector size */ p = realloc(ist->levels, n *sizeof(ISNODE*)); if (!p) return -1; /* enlarge the level vector */ ist->levels = (ISNODE**)p; /* and set the new vector */ p = realloc(ist->buf, n *sizeof(int)); if (!p) return -1; /* enlarge the buffer vector */ ist->buf = (int*)p; /* and set the new vector */ ist->lvlvsz = n; /* set the new vector size */ } /* (applies to buf and levels) */ end = ist->levels +ist->lvlcnt; *end = NULL; /* start a new tree level */ /* --- add tree level --- */ s_sub = (int)ceil(ist->supp *ist->tacnt); /* minimal subset support */ s_min = (ist->rsdef == IST_BOTH) ? s_sub /* minimal rule support */ : (int)ceil(ist->tacnt *ist->supp *ist->conf); for (ndp = ist->levels +ist->lvlcnt -1; *ndp; ndp = &(*ndp)->succ) { frst = last = NULL; /* traverse the deepest nodes */ for (i = n = 0; i < (*ndp)->size; i++) { new = _child(ist, *ndp, i, s_min, s_sub); if (!new) continue; /* create a child if necessary */ if (new == (void*)-1) { _cleanup(ist); return -1; } if (!frst) frst = new; /* note first and last child node */ *end = last = new; /* add node at the end of the list */ end = &new->succ; n++; /* that contains the new level */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -