📄 istree.c
字号:
static int _checkuse (ISNODE *node, char *marks, int supp){ /* --- recursively check item usage */ int i, r = 0; /* vector index, result of check */ int *map, n; /* identifier map and its size */ ISNODE **vec; /* child node vector */ assert(node && marks); /* check the function arguments */ if (node->offset >= 0) { /* if a pure vector is used */ if (node->chcnt == 0) { /* if this is a new node */ n = node->offset; /* get the index offset */ for (i = node->size; --i >= 0; ) { if (node->cnts[i] >= supp) marks[n+i] = r = 1; /* mark items in set that satisfies */ } } /* the minimum support criterion */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(node->cnts +node->size); for (i = node->chcnt; --i >= 0; ) if (vec[i]) r |= _checkuse(vec[i], marks, supp); } } /* recursively process all children */ else { /* if an identifer map is used */ map = node->cnts +node->size; if (node->chcnt == 0) { /* if this is a new node */ for (i = node->size; --i >= 0; ) { if (node->cnts[i] >= supp) marks[map[i]] = r = 1;/* mark items in set that satisfies */ } } /* the minimum support criterion */ else if (node->chcnt > 0) { /* if there are child nodes */ vec = (ISNODE**)(map +node->size); for (i = node->chcnt; --i >= 0; ) if (vec[i]) r |= _checkuse(vec[i], marks, supp); } /* get the child vector and */ } /* recursively process all children */ if ((r != 0) && node->parent) /* if the check succeeded, mark */ marks[ID(node)] = 1; /* the item associated with the node */ return r; /* return the check result */} /* _checkuse() *//*--------------------------------------------------------------------*/static int _getsupp (ISNODE *node, int *set, int cnt){ /* --- get support of an item set */ int i, n, c; /* vector index, buffers */ int *map; /* identifier map */ ISNODE **vec; /* vector of child nodes */ assert(node && set && (cnt >= 0)); /* check the function arguments */ while (--cnt > 0) { /* follow the set/path from the node */ c = node->chcnt & ~F_SKIP; /* if there are no children, */ if (c <= 0) return -1; /* the support is less than minsupp */ if (node->offset >= 0) { /* if a pure vector is used */ vec = (ISNODE**)(node->cnts +node->size); i = *set++ -ID(vec[0]); /* compute the child vector index and */ if (i >= c) return -1; } /* abort if the child does not exist */ else { /* if an identifier map is used */ map = node->cnts +(n = node->size); vec = (ISNODE**)(map +n); /* get id. map and child vector */ if (c < n) /* if a secondary id. map exists, */ map = (int*)(vec +(n = c)); /* get this identifier map */ i = _bsearch(map, n, *set++); } /* search for the proper index */ if (i < 0) return -1; /* abort if index is out of range */ node = vec[i]; /* go to the corresponding child */ if (!node) return -1; /* if the child does not exists, */ } /* the support is less than minsupp */ if (node->offset >= 0) { /* if a pure vector is used, */ i = *set -node->offset; /* compute the counter index */ if (i >= node->size) return -1; } else { /* if an identifier map is used */ map = node->cnts +(n = node->size); i = _bsearch(map, n, *set); } /* search for the proper index */ if (i < 0) return -1; /* abort if index is out of range */ return node->cnts[i]; /* return the item set support */} /* _getsupp() *//*--------------------------------------------------------------------*/static void _marksupp (ISNODE *node, int *set, int cnt, int supp){ /* --- mark support of an item set */ int i, n, c; /* vector index, buffers */ int *map; /* identifier map */ ISNODE **vec; /* vector of child nodes */ assert(node && set && (cnt >= 0)); /* check the function arguments */ while (--cnt > 0) { /* follow the set/path from the node */ if (node->offset >= 0) { /* if a pure vector is used */ vec = (ISNODE**)(node->cnts +node->size); i = *set++ -ID(vec[0]);}/* compute the child vector index */ else { /* if an identifier map is used */ map = node->cnts +(n = node->size); vec = (ISNODE**)(map +n); /* get id. map, child vector and */ c = node->chcnt & ~F_SKIP; /* the number of children */ if (c < n) /* if a secondary id. map exists, */ map = (int*)(vec +(n = c)); /* get this identifier map */ i = _bsearch(map, n, *set++); } /* search for the proper index */ node = vec[i]; /* go to the corresponding child */ } if (node->offset >= 0) /* if a pure vector is used, */ i = *set -node->offset; /* compute the counter index */ else { /* if an identifier map is used */ map = node->cnts +(n = node->size); i = _bsearch(map, n, *set); } /* search for the proper index */ if ((supp < 0) /* if to clear unconditionally */ || (node->cnts[i] == supp)) /* or the support is the same */ node->cnts[i] |= F_SKIP; /* mark support as cleared */} /* _marksupp() *//*--------------------------------------------------------------------*/static void _marksub (ISTREE *ist, ISNODE *node, int index, int supp){ /* --- mark all n-1 subsets */ int i; /* next item, loop variable */ int *set; /* (partial) item set */ if (node->offset >= 0) i = node->offset +index; else i = node->cnts[node->size +index]; set = ist->buf +ist->vsz; /* get and store the first two items */ *--set = i; _marksupp(node->parent, set, 1, supp); *--set = ID(node); _marksupp(node->parent, set, 1, supp); i = 2; /* mark counters in parent node */ for (node = node->parent; node->parent; node = node->parent) { _marksupp(node->parent, set, i, supp); *--set = ID(node); i++; /* climb up the tree and mark */ } /* counters for all n-1 subsets */} /* _marksub() *//*--------------------------------------------------------------------*/static ISNODE* _child (ISTREE *ist, ISNODE *node, int index, int s_min, int s_body){ /* --- create child node (extend set) */ int i, k, n; /* loop variables, counters */ ISNODE *curr; /* to traverse the path to the root */ int item, cnt; /* item identifier, number of items */ int *set; /* next (partial) item set to check */ int body; /* enough support for a rule body */ int hdonly; /* whether head only item on path */ int app; /* appearance flags of an item */ int s_set; /* support of an item set */ assert(ist && node /* check the function arguments */ && (index >= 0) && (index < node->size)); if (node->offset >= 0) item = node->offset +index; else item = node->cnts[node->size +index]; app = is_getapp(ist->set, item); /* get item id. and app. flag */ if ((app == IST_IGNORE) /* do not extend an item to ignore */ || ((HDONLY(node) && (app == IST_HEAD)))) return NULL; /* nor a set with two head only items */ hdonly = HDONLY(node) || (app == IST_HEAD); /* --- initialize --- */ s_set = node->cnts[index]; /* get support of item set to extend */ if (s_set < s_min) /* if set support is insufficient, */ return NULL; /* no child is needed, so abort */ body = (s_set >= s_body) /* if the set has enough support for */ ? 1 : 0; /* a rule body, set the body flag */ ist->buf[ist->vsz -2] = item; /* init. set for support checks */ /* --- check candidates --- */ for (n = 0, i = index; ++i < node->size; ) { if (node->offset >= 0) k = node->offset +i; else k = node->cnts[node->size +i]; app = is_getapp(ist->set, k); /* traverse the candidate items */ if ((app == IST_IGNORE) || (hdonly && (app == IST_HEAD))) continue; /* skip sets with two head only items */ s_set = node->cnts[i]; /* traverse the candidate items */ if (s_set < s_min) /* if set support is insufficient, */ continue; /* ignore the corresponding candidate */ body &= 1; /* restrict body flags to the set S */ if (s_set >= s_body) /* if set support is sufficient for */ body |= 2; /* a rule body, set the body flag */ set = ist->buf +ist->vsz -(cnt = 2); set[1] = k; /* add the candidate item to the set */ for (curr = node; curr->parent; curr = curr->parent) { s_set = _getsupp(curr->parent, set, cnt); if (s_set < s_min) /* get the item set support and */ break; /* if it is too low, abort the loop */ if (s_set >= s_body) /* if some subset has enough support */ body |= 4; /* for a rule body, set the body flag */ *--set = ID(curr); cnt++; /* add id of current node to the set */ } /* and adapt the number of items */ if (!curr->parent && body) /* if subset support is high enough */ ist->map[n++] = k; /* for a full rule and a rule body, */ } /* note the item identifier */ if (n <= 0) return NULL; /* if no child is needed, abort */ #ifdef BENCH /* if benchmark version: */ ist->scnec += n; /* sum the necessary counters */ #endif /* --- decide on node structure --- */ k = ist->map[n-1] -ist->map[0] +1; if (!(ist->mode & IST_MEMOPT)) n = k; else if (3*n >= 2*k) n = k; /* use a pure vector if it is small */ else k = n+n; /* enough, otherwise use an id. map */ #ifdef ARCH64 /* if 64 bit architecture */ if ((n == k) && (k & 1)) n = ++k; #endif /* pad to even number of counters */ #ifdef BENCH /* if benchmark version */ ist->sccnt += n; /* sum the number of counters */ ist->bytes += sizeof(ISNODE) +(k-1) *sizeof(int) +8; #endif /* determine the memory usage */ /* --- create child --- */ curr = (ISNODE*)malloc(sizeof(ISNODE) +(k-1) *sizeof(int)); if (!curr) return (void*)-1; /* create a child node */ curr->parent = node; /* set pointer to parent node */ curr->succ = NULL; /* and clear successor pointer */ curr->id = item; /* initialize the item id. and */ if (hdonly) curr->id |= F_HDONLY; /* set the head only flag */ curr->chcnt = 0; /* there are no children yet */ curr->size = n; /* set size of counter vector */ if (n == k) /* if to use a pure vector, */ curr->offset = ist->map[0]; /* note the first item as an offset */ else { /* if to use an identifier map, */ curr->offset = -1; /* use the offset as an indicator */ for (set = curr->cnts +n +(i = n); --i >= 0; ) *--set = ist->map[i]; /* copy the identifier map */ } /* from the buffer to the node */ for (set = curr->cnts +(i = n); --i >= 0; ) *--set = 0; /* clear all counters of the node */ return curr; /* return pointer to created child */} /* _child() *//*---------------------------------------------------------------------- In the above function the set S represented by the index-th vectorelement of the current node is extended only by combining it with thesets represented by the fields that follow it in the node vector,i.e. by the sets represented by vec[index+1] to vec[size-1]. The setsthat can be formed by combining the set S and the sets represented byvec[0] to vec[index-1] are processed in the branches for these sets. In the 'check candidates' loop it is checked for each set representedby vec[index+1] to vec[size-1] whether this set and all other subsetsof the same size, which can be formed from the union of this set andthe set S, have enough support, so that a child node is necessary. Note that i +offset is the identifier of the item that has to beadded to set S to form the union of the set S and the set T representedby vec[i], since S and T have the same path with the exception of theindex in the current node. Hence we can speak of candidate items thatare added to S. Checking the support of the other subsets of the union of S and Tthat have the same size as S and T is done with the aid of a pathvariable. The items in this variable combined with the items on thepath to the current node always represent the subset currently tested.That is, the path variable holds the path to be followed from the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -