📄 istree.c
字号:
ist->node = ist->head = NULL; ist->size = minlen; /* initialize rule extraction */ if ((arem < EM_NONE) || (arem >= EM_UNKNOWN)) arem = EM_NONE; /* check, adapt, and note */ ist->arem = arem; /* additional evaluation measure */ ist->minval = minval; /* and its minimal value */} /* ist_init() *//*--------------------------------------------------------------------*/int ist_set (ISTREE *ist, int *set, double *supp, double *aval){ /* --- extract next frequent item set */ int i; /* loop variable */ int item; /* an item identifier */ ISNODE *node, *tmp; /* current item set node, buffer */ int *cnts; /* to access the item frequencies */ int s_min; /* minimal support of a set */ int s_set; /* support of the current set */ double dev; /* deviation from indep. occurrence */ double nrm; /* freq. normalization factor */ assert(ist && set && supp); /* check the function arguments */ /* --- initialize --- */ if (ist->size > ist->lvlcnt) /* if the tree is not high enough */ return -1; /* for the item set size, abort */ s_min = (int)ceil(ist->tacnt *ist->supp); /* get minimal support */ if (!ist->node) /* on first item set, initialize */ ist->node = ist->levels[ist->size-1]; /* the current node */ node = ist->node; /* get the current item set node */ nrm = (ist->tacnt > 0) /* compute the normalization factor */ ? 1.0/ist->tacnt : 1.0; /* for the item set support and */ cnts = ist->levels[0]->cnts; /* get the item frequency vector */ /* --- find frequent item set --- */ while (1) { /* search for a frequent item set */ if (++ist->index >= node->size) { /* if all subsets have been */ node = node->succ; /* processed, go to the successor */ if (!node) { /* if at the end of a level, go down */ if (++ist->size > ist->lvlcnt) return -1; /* if beyond the deepest level, abort */ node = ist->levels[ist->size -1]; } /* get the 1st node of the new level */ ist->node = node; /* note the new item set node */ ist->index = 0; /* start with the first item set */ } /* of the new item set node */ if (node->offset >= 0) item = node->offset +ist->index; else item = node->cnts[node->size +ist->index]; if (ist->apps[item] == IST_IGNORE) continue; /* skip items to ignore */ s_set = node->cnts[ist->index]; if (s_set < s_min) /* if the support is not sufficient, */ continue; /* go to the next item set */ if (ist->arem == EM_LOGQ){ /* if logarithm of supp. quotient */ dev = log(abs(cnts[item]) /(double)ist->tacnt); for (tmp = node; tmp->parent; tmp = tmp->parent) dev += log(abs(cnts[ID(tmp)]) /(double)ist->tacnt); dev = (log(s_set /(double)ist->tacnt) -dev) /(100*LN_2); } else if (ist->arem == EM_QUOT) { /* if measure is the quotient */ dev = abs(cnts[item]); /* compute the expected support */ for (tmp = node; tmp->parent; tmp = tmp->parent) dev *= abs(cnts[ID(tmp)]) *nrm; dev = s_set /dev -1.0; } /* compute the support quotient -1 */ else { /* if no add. evaluation measure, */ dev = 0; break; } /* abort the search loop */ if (dev >= ist->minval) /* if the value of the additional */ break; /* evaluation measure is high enough, */ } /* abort the search loop */ *supp = s_set *nrm; /* compute the item set support */ /* --- build frequent item set --- */ i = ist->size; /* get the current item set size */ set[--i] = item; /* and store the first item */ while (node->parent) { /* while not at the root node */ set[--i] = ID(node); /* add item to the item set */ node = node->parent; /* and go to the parent node */ } if (aval) *aval = dev; /* set the add. evaluation measure */ return ist->size; /* return the item set size */} /* ist_set() *//*--------------------------------------------------------------------*/int ist_rule (ISTREE *ist, int *rule, double *supp, double *conf, double *lift, double *aval){ /* --- extract next rule */ int i; /* loop variable */ int item; /* an item identifier */ ISNODE *node; /* current item set node */ ISNODE *parent; /* parent of the item set node */ ISNODE **vec; /* child node vector */ int *map, n; /* identifier map and its size */ int s_rule; /* minimal support of a rule */ int s_min; /* minimal support of a set */ int s_set; /* support of set (body & head) */ int s_sub; /* support of subset (body) */ double p_body, p_head; /* prior confidences/probabilities */ double c, v; /* confidence and measure value */ int app; /* appearance flag of head item */ assert(ist && rule && supp && conf); /* check function arguments */ /* --- initialize --- */ if (ist->size > ist->lvlcnt) /* if the tree is not high enough */ return -1; /* for the rule length, abort */ s_rule = (int)ceil(ist->tacnt *ist->supp); /* minimal rule support */ s_min = (ist->rsdef == IST_BOTH) ? s_rule /* minimal set support */ : (int)ceil(ist->tacnt *ist->supp *ist->conf); if (ist->node) /* if this is not the first rule, */ node = ist->node; /* get the buffered item set node */ else { /* if this is the first rule */ node = ist->node = ist->levels[ist->size -1]; ist->index = ist->item = -1;/* initialize the */ } /* rule extraction variables */ /* --- find rule --- */ while (1) { /* search for a rule */ if (ist->item >= 0) { /* --- select next item subset */ *--ist->path = ist->item; /* add previous head to the path */ ist->plen++; /* and get the next head item */ ist->item = ID(ist->head); ist->head = ist->head->parent; if (!ist->head) /* if all subsets have been processed */ ist->item = -1; /* clear the head item to trigger the */ } /* selection of a new item set */ if (ist->item < 0) { /* --- select next item set */ if (++ist->index >= node->size){/* if all subsets have been */ node = node->succ; /* processed, go to the successor */ if (!node) { /* if at the end of a level, go down */ if (++ist->size > ist->lvlcnt) return -1; /* if beyond the deepest level, abort */ node = ist->levels[ist->size -1]; } /* get the 1st node of the new level */ ist->node = node; /* note the new item set node and */ ist->index = 0; /* start with the first item set */ } /* of the new item set node */ if (node->offset >= 0) item = node->offset +ist->index; else item = node->cnts[node->size +ist->index]; if ((ist->apps[item] == IST_IGNORE) || (HDONLY(node) && (ist->apps[item] == IST_HEAD))) continue; /* skip sets with two head only items */ ist->item = item; /* set the head item identifier */ ist->hdonly = HDONLY(node) || (ist->apps[item] == IST_HEAD); ist->head = node; /* set the new head item node */ ist->path = ist->buf +ist->lvlvsz; ist->plen = 0; /* clear the path */ } app = ist->apps[ist->item]; /* get head item appearance */ if (!(app & IST_HEAD) || (ist->hdonly && (app != IST_HEAD))) continue; /* if rule is not allowed, skip it */ s_set = node->cnts[ist->index]; /* get the item set support */ if (s_set < s_min) { /* if the set support is too low, */ ist->item = -1; continue; } /* skip this item set */ parent = node->parent; /* get the parent node */ if (ist->plen > 0) /* if there is a path, use it */ s_sub = _getsupp(ist->head, ist->path, ist->plen); else if (!parent) /* if there is no parent (root node), */ s_sub = ist->tacnt; /* get the number of transactions */ else if (parent->offset >= 0) /* if a pure vector is used */ s_sub = parent->cnts[ID(node) -parent->offset]; else { /* if an identifier map is used */ map = parent->cnts +(n = parent->size); vec = (ISNODE**)(map +n); /* get id. map and child vector */ s_sub = parent->cnts[_bsearch(map, n, ID(node))]; } /* find vector index and get support */ if (s_sub < s_rule) /* if the subset support is too low, */ continue; /* get the next subset/next set */ c = (s_sub > 0) /* compute the rule confidence */ ? (double)s_set/s_sub : 1; if (c < ist->conf -EPSILON) /* if the confidence is too low, */ continue; /* get the next item subset/item set */ if (ist->arem == EM_NONE) { /* if no add. eval. measure given, */ v = 0; break; } /* abort the loop (select the rule) */ if (ist->size < 2) { /* if rule has an empty antecedent, */ v = 0; break; } /* abort the loop (select the rule) */ if (ist->tacnt <= 0) /* if there are no transactions, */ p_body = p_head = 1; /* all probabilities are 1 */ else { /* if there are transactions */ p_body = (double)s_sub /ist->tacnt; p_head = (double)ist->levels[0]->cnts[ist->item] /ist->tacnt; } /* estimate prior probabilities */ v = _evalfns[ist->arem](p_head, p_body, c); if (v >= ist->minval) /* if rule value exceeds the minimal */ break; /* of the add. rule eval. measure, */ } /* while (1) */ /* abort the loop (select rule) */ *supp = (ist->tacnt <= 0) ? 1 /* compute the rule support */ : ((ist->rsdef == IST_BODY) ? s_sub : s_set) / (double)ist->tacnt; /* (respect the rule support def.) */ if (lift) /* compute and store the lift value */ *lift = (c *ist->tacnt)/(double)ist->levels[0]->cnts[ist->item]; /* --- build rule --- */ if (node->offset >= 0) item = node->offset +ist->index; else item = node->cnts[node->size +ist->index]; i = ist->size; /* get the current item and */ if (item != ist->item) /* if this item is not the head, */ rule[--i] = item; /* add it to the rule body */ while (node->parent) { /* traverse the path to the root */ if (ID(node) != ist->item) /* and add all items on this */ rule[--i] = ID(node); /* path to the rule body */ node = node->parent; /* (except the head of the rule) */ } rule[0] = ist->item; /* set the head of the rule, */ *conf = c; /* the rule confidence, and */ if (aval) *aval = v; /* the value of the add. measure */ return ist->size; /* return the rule size */} /* ist_rule() *//*--------------------------------------------------------------------*/int ist_hedge (ISTREE *ist, int *hedge, double *supp, double *conf){ /* --- extract next hyperedge */ int i; /* loop variable */ int item; /* an item identifier */ ISNODE *node; /* current item set node */ ISNODE *head; /* node containing the rule head */ ISNODE **vec; /* child node vector of head node */ int *map, n; /* identifier map and its size */ int *path, plen; /* path in tree and its length */ int s_min; /* minimal support of a hyperedge */ int s_set; /* support of set (body & head) */ int s_sub; /* support of subset (body) */ assert(ist && hedge && supp && conf); /* check function arguments */ /* --- initialize --- */ if (ist->size > ist->lvlcnt) /* if the tree is not high enough */ return -1; /* for the hyperedge size, abort */ s_min = (int)ceil(ist->tacnt *ist->supp); /* get minimal support */ if (!ist->node) /* on first hyperedge, initialize */ ist->node = ist->levels[ist->size -1]; /* the current node */ node = ist->node; /* get the current item set node */ /* --- find hyperedge --- */ while (1) { /* search for a hyperedge */ if (++ist->index >= node->size) { /* if all subsets have been */ node = node->succ; /* processed, go to the successor */ if (!node) { /* if at the end of a level, go down */ if (++ist->size > ist->lvlcnt) return -1; /
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -