⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 istree.c

📁 apriori算法是数据挖掘的经典算法之1,其基于关联规则的思想.这是我的第2个收藏算法
💻 C
📖 第 1 页 / 共 4 页
字号:
/*--------------------------------------------------------------------*/int ist_rule (ISTREE *ist, int *rule,              double *supp, double *conf, double *aval){                               /* --- extract next rule */  int    i;                     /* loop variable */  int    item;                  /* an item identifier */  ISNODE *node;                 /* current item set node */  ISNODE *parent;               /* parent of the item set node */  ISNODE **vec;                 /* child node vector */  int    *map, n;               /* identifier map and its size */  int    s_rule;                /* minimal support of a rule */  int    s_min;                 /* minimal support of a set */  int    s_set;                 /* support of set    (body & head) */  int    s_sub;                 /* support of subset (body) */  double p_body, p_head;        /* prior confidences/probabilities */  double c, v;                  /* confidence and measure value */  int    app;                   /* appearance flag of head item */  assert(ist && rule && supp && conf);  /* check function arguments */  /* --- initialize --- */  if (ist->size > ist->lvlcnt)  /* if the tree is not high enough */    return -1;                  /* for the rule length, abort */  s_rule = (int)ceil(ist->tacnt *ist->supp);  /* minimal rule support */  s_min  = (ist->rsdef == IST_BOTH) ? s_rule  /* minimal set  support */         : (int)ceil(ist->tacnt *ist->supp *ist->conf);  if (ist->node)                /* if this is not the first rule, */    node = ist->node;           /* get the buffered item set node */  else {                        /* if this is the first rule */    node = ist->node = ist->levels[ist->size -1];    ist->index = ist->item = -1;/* initialize the */  }                             /* rule extraction variables */  /* --- find rule --- */  while (1) {                   /* search for a rule */    if (ist->item >= 0) {       /* --- select next item subset */      *--ist->path = ist->item; /* add previous head to the path */      ist->plen++;              /* and get the next head item */      ist->item = ID(ist->head);      ist->head = ist->head->parent;      if (!ist->head)           /* if all subsets have been processed */        ist->item = -1;         /* clear the head item to trigger the */    }                           /* selection of a new item set */    if (ist->item < 0) {        /* --- select next item set */      if (++ist->index >= node->size){/* if all subsets have been */        node = node->succ;      /* processed, go to the successor */        if (!node) {            /* if at the end of a level, go down */          if (++ist->size > ist->lvlcnt)            return -1;          /* if beyond the deepest level, abort */          node = ist->levels[ist->size -1];        }                       /* get the 1st node of the new level */        ist->node = node;       /* note the new item set node and */        ist->index  = 0;        /* start with the first item set */      }                         /* of the new item set node */      if (node->offset >= 0) item = node->offset +ist->index;      else                   item = node->cnts[node->size +ist->index];      if ((ist->apps[item] == IST_IGNORE)      ||  (HDONLY(node) && (ist->apps[item] == IST_HEAD)))        continue;               /* skip sets with two head only items */      ist->item   = item;       /* set the head item identifier */      ist->hdonly = HDONLY(node) || (ist->apps[item] == IST_HEAD);      ist->head   = node;       /* set the new head item node */      ist->path   = ist->buf +ist->lvlvsz;      ist->plen   = 0;          /* clear the path */    }    app = ist->apps[ist->item]; /* get head item appearance */    if (!(app & IST_HEAD) || (ist->hdonly && (app != IST_HEAD)))      continue;                 /* if rule is not allowed, skip it */    s_set = node->cnts[ist->index]; /* get the item set support */    if (s_set < s_min) {        /* if the set support is too low, */      ist->item = -1; continue; }   /* skip this item set */    parent = node->parent;      /* get the parent node */    if (ist->plen > 0)          /* if there is a path, use it */      s_sub = _getsupp(ist->head, ist->path, ist->plen);    else if (!parent)           /* if there is no parent (root node), */      s_sub = ist->tacnt;       /* get the number of transactions */    else if (parent->offset >= 0)   /* if a pure vector is used */      s_sub = parent->cnts[ID(node) -parent->offset];    else {                      /* if an identifier map is used */      map = parent->cnts +(n = parent->size);      vec = (ISNODE**)(map +n); /* get id. map and child vector */      s_sub = parent->cnts[_bsearch(map, n, ID(node))];    }                           /* find vector index and get support */    if (s_sub < s_rule)         /* if the subset support is too low, */      continue;                 /* get the next subset/next set */    c = (s_sub > 0)             /* compute the rule confidence */      ? (double)s_set/s_sub : 1;    if (c < ist->conf -EPSILON) /* if the confidence is too low, */      continue;                 /* get the next item subset/item set */    if (ist->arem == EM_NONE) { /* if no add. eval. measure given, */      v = 0; break; }           /* abort the loop (select the rule) */    if (ist->size < 2) {        /* if rule has an empty antecedent, */      v = 0; break; }           /* abort the loop (select the rule) */    if (ist->tacnt <= 0)        /* if there are no transactions, */      p_body = p_head = 1;      /* all probabilities are 1 */    else {                      /* if there are transactions */      p_body = (double)s_sub                           /ist->tacnt;      p_head = (double)ist->levels[0]->cnts[ist->item] /ist->tacnt;    }                           /* estimate prior probabilities */    v = _evalfns[ist->arem](p_head, p_body, c);    if (v >= ist->minval)       /* if rule value exceeds the minimal */      break;                    /* of the add. rule eval. measure, */  }  /* while (1) */            /* abort the loop (select rule) */  *supp = (ist->tacnt <= 0) ? 1 /* compute the rule support */        : ((ist->rsdef == IST_BODY) ? s_sub : s_set)        / (double)ist->tacnt;   /* (respect the rule support def.) */  /* --- build rule --- */  if (node->offset >= 0) item = node->offset +ist->index;  else                   item = node->cnts[node->size +ist->index];  i = ist->size;                /* get the current item and */  if (item != ist->item)        /* if this item is not the head, */    rule[--i] = item;           /* add it to the rule body */  while (node->parent) {        /* traverse the path to the root */    if (ID(node) != ist->item)  /* and add all items on this */      rule[--i] = ID(node);     /* path to the rule body */    node = node->parent;        /* (except the head of the rule) */  }  rule[0] = ist->item;          /* set the head of the rule, */  *conf   = c;                  /* the rule confidence, and */  if (aval) *aval = v;          /* the value of the add. measure */  return ist->size;             /* return the rule size */}  /* ist_rule() *//*--------------------------------------------------------------------*/int ist_hedge (ISTREE *ist, int *hedge, double *supp, double *conf){                               /* --- extract next hyperedge */  int    i;                     /* loop variable */  int    item;                  /* an item identifier */  ISNODE *node;                 /* current item set node */  ISNODE *head;                 /* node containing the rule head */  ISNODE **vec;                 /* child node vector of head node */  int    *map, n;               /* identifier map and its size */  int    *path, plen;           /* path in tree and its length */  int    s_min;                 /* minimal support of a hyperedge */  int    s_set;                 /* support of set    (body & head) */  int    s_sub;                 /* support of subset (body) */  assert(ist && hedge && supp && conf);  /* check function arguments */  /* --- initialize --- */  if (ist->size > ist->lvlcnt)  /* if the tree is not high enough */    return -1;                  /* for the hyperedge size, abort */  s_min = (int)ceil(ist->tacnt *ist->supp); /* get minimal support */  if (!ist->node)               /* on first hyperedge, initialize */    ist->node = ist->levels[ist->size -1];    /* the current node */  node = ist->node;             /* get the current item set node */  /* --- find hyperedge --- */  while (1) {                   /* search for a hyperedge */    if (++ist->index >= node->size) { /* if all subsets have been */      node = node->succ;        /* processed, go to the successor */      if (!node) {              /* if at the end of a level, go down */        if (++ist->size > ist->lvlcnt)          return -1;            /* if beyond the deepest level, abort */        node = ist->levels[ist->size -1];      }                         /* get the 1st node of the new level */      ist->node  = node;        /* note the new item set node and */      ist->index = 0;           /* start with the first item set */    }                           /* of the new item set node */    if (node->offset >= 0) item = node->offset +ist->index;    else                   item = node->cnts[node->size +ist->index];    if (ist->apps[item] == IST_IGNORE)      continue;                 /* skip items to ignore */    s_set = node->cnts[ist->index];    if (s_set < s_min)          /* if the set support is too low, */      continue;                 /* skip this item set */    head = node->parent;        /* get subset support from parent */    if (!head)                  /* if there is no parent (root node), */      s_sub = ist->tacnt;       /* get the total number of sets */    else if (head->offset >= 0) /* if pure vectors are used */      s_sub = head->cnts[ID(node) -head->offset];    else {                      /* if an identifier map is used */      map = head->cnts +(n = head->size);      vec = (ISNODE**)(map +n); /* get id. map and child vector */      s_sub = head->cnts[_bsearch(map, n, ID(node))];    }                           /* find index and get the support */    *conf   = (s_sub > 0)       /* compute confidence of first rule */            ? (double)s_set/s_sub : 1;    path    = ist->buf +ist->lvlvsz;    *--path = ist->index +node->offset;    plen    = 1;                /* initialize the path and */    while (head) {              /* traverse the path up to root */      s_sub = _getsupp(head, path, plen);      *conf += (s_sub > 0)      /* sum the rule confidences */             ? (double)s_set/s_sub : 1;      *--path = ID(head);       /* store the previous head item */      plen++;                   /* in the path (extend path) */      head = head->parent;      /* and go to the parent node */    }                           /* (get the next rule head) */    *conf /= ist->size;         /* average the rule confidences */    if (*conf >= ist->minval) break;  }  /* while(1) */             /* if confidence suffices, abort loop */  *supp = (ist->tacnt > 0)      /* compute the hyperedge support */        ? (double)s_set/ist->tacnt : 1;  /* --- build hyperedge --- */  i = ist->size -1;             /* store the first item */  if (node->offset >= 0) hedge[i] = ist->index +node->offset;  else                   hedge[i] = node->cnts[node->size +ist->index];  while (node->parent) {        /* while not at the root node */    hedge[--i] = ID(node);      /* add item to the hyperedge */    node = node->parent;        /* and go to the parent node */  }  return ist->size;             /* return the hyperedge size */}  /* ist_hedge() *//*--------------------------------------------------------------------*/#ifndef NDEBUGstatic void _showtree (ISNODE *node, int level){                               /* --- show subtree */  int    i, k;                  /* loop variables, buffer */  int    *map, n;               /* identifier map and its size */  ISNODE **vec;                 /* vector of child nodes */  assert(node && (level >= 0)); /* check the function arguments */  if      (node->chcnt  <= 0)   /* if there are no children, */    vec = NULL;                 /* clear the child vector variable */  else if (node->offset >= 0)   /* if a pure vector is used */    vec = (ISNODE**)(node->cnts +node->size);  else {                        /* if an identifier map is used */    map = node->cnts +(n = node->size);    vec = (ISNODE**)(map +n);   /* get id. map and child vector */    if (node->chcnt < n)        /* if a secondary id. map exists */      map = (int*)(vec +(n = node->chcnt));  }                             /* get child access variables */  for (i = 0; i < node->size; i++) {    for (k = level; --k >= 0; ) /* indent and print */      printf("   ");            /* item identifier and counter */    if (node->offset >= 0) k = node->offset +i;    else                   k = node->cnts[node->size +i];    printf("%d: %d\n", k, node->cnts[i]);    if (!vec) continue;         /* check whether there are children */    if (node->offset >= 0) k -= ID(vec[0]);    else                   k = _bsearch(map, n, k);    if ((k >= 0) && (k < node->chcnt) && vec[k])      _showtree(vec[k], level +1);  }                             /* show subtree recursively */}  /* _showtree() *//*--------------------------------------------------------------------*/void ist_show (ISTREE *ist){                               /* --- show an item set tree */  assert(ist);                  /* check the function argument */  _showtree(ist->levels[0], 0); /* show nodes recursively */  printf("total: %d\n", ist->tacnt);}  /* ist_show() */             /* print number of transactions */#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -