⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 istree.c

📁 数据挖掘中的关联规则算法
💻 C
📖 第 1 页 / 共 5 页
字号:
  ist->node  = ist->head = NULL;  ist->size  = minlen;          /* initialize rule extraction */  if ((arem < EM_NONE) || (arem >= EM_UNKNOWN))    arem = EM_NONE;             /* check, adapt, and note */  ist->arem   = arem;           /* additional evaluation measure */  ist->minval = minval;         /* and its minimal value */}  /* ist_init() *//*--------------------------------------------------------------------*/int ist_set (ISTREE *ist, int *set, double *supp, double *aval){                               /* --- extract next frequent item set */  int    i;                     /* loop variable */  int    item;                  /* an item identifier */  ISNODE *node, *tmp;           /* current item set node, buffer */  int    *cnts;                 /* to access the item frequencies */  int    s_min;                 /* minimal support of a set */  int    s_set;                 /* support of the current set */  double dev;                   /* deviation from indep. occurrence */  double nrm;                   /* freq. normalization factor */  assert(ist && set && supp);   /* check the function arguments */  /* --- initialize --- */  if (ist->size > ist->lvlcnt)  /* if the tree is not high enough */    return -1;                  /* for the item set size, abort */  s_min = (int)ceil(ist->tacnt *ist->supp); /* get minimal support */  if (!ist->node)               /* on first item set, initialize */    ist->node = ist->levels[ist->size-1];    /* the current node */  node = ist->node;             /* get the current item set node */  nrm  = (ist->tacnt > 0)       /* compute the normalization factor */       ? 1.0/ist->tacnt : 1.0;  /* for the item set support and */  cnts = ist->levels[0]->cnts;  /* get the item frequency vector */  /* --- find frequent item set --- */  while (1) {                   /* search for a frequent item set */    if (++ist->index >= node->size) { /* if all subsets have been */      node = node->succ;        /* processed, go to the successor */      if (!node) {              /* if at the end of a level, go down */        if (++ist->size > ist->lvlcnt)          return -1;            /* if beyond the deepest level, abort */        node = ist->levels[ist->size -1];      }                         /* get the 1st node of the new level */      ist->node  = node;        /* note the new item set node */      ist->index = 0;           /* start with the first item set */    }                           /* of the new item set node */    if (node->offset >= 0) item = node->offset +ist->index;    else                   item = node->cnts[node->size +ist->index];    if (ist->apps[item] == IST_IGNORE)      continue;                 /* skip items to ignore */    s_set = node->cnts[ist->index];    if (s_set < s_min)          /* if the support is not sufficient, */      continue;                 /* go to the next item set */    if      (ist->arem == EM_LOGQ){ /* if logarithm of supp. quotient */      dev = log(abs(cnts[item]) /(double)ist->tacnt);      for (tmp = node; tmp->parent; tmp = tmp->parent)        dev += log(abs(cnts[ID(tmp)]) /(double)ist->tacnt);      dev = (log(s_set /(double)ist->tacnt) -dev) /(100*LN_2); }    else if (ist->arem == EM_QUOT) { /* if measure is the quotient */      dev = abs(cnts[item]);    /* compute the expected support */      for (tmp = node; tmp->parent; tmp = tmp->parent)        dev *= abs(cnts[ID(tmp)]) *nrm;      dev = s_set /dev -1.0;  } /* compute the support quotient -1 */    else {                      /* if no add. evaluation measure, */      dev = 0; break; }         /* abort the search loop */    if (dev >= ist->minval)     /* if the value of the additional */      break;                    /* evaluation measure is high enough, */  }                             /* abort the search loop */  *supp = s_set *nrm;           /* compute the item set support */  /* --- build frequent item set --- */  i        = ist->size;         /* get the current item set size */  set[--i] = item;              /* and store the first item */  while (node->parent) {        /* while not at the root node */    set[--i] = ID(node);        /* add item to the item set */    node = node->parent;        /* and go to the parent node */  }  if (aval) *aval = dev;        /* set the add. evaluation measure */  return ist->size;             /* return the item set size */}  /* ist_set() *//*--------------------------------------------------------------------*/int ist_rule (ISTREE *ist, int *rule,              double *supp, double *conf, double *lift, double *aval){                               /* --- extract next rule */  int    i;                     /* loop variable */  int    item;                  /* an item identifier */  ISNODE *node;                 /* current item set node */  ISNODE *parent;               /* parent of the item set node */  ISNODE **vec;                 /* child node vector */  int    *map, n;               /* identifier map and its size */  int    s_rule;                /* minimal support of a rule */  int    s_min;                 /* minimal support of a set */  int    s_set;                 /* support of set    (body & head) */  int    s_sub;                 /* support of subset (body) */  double p_body, p_head;        /* prior confidences/probabilities */  double c, v;                  /* confidence and measure value */  int    app;                   /* appearance flag of head item */  assert(ist && rule && supp && conf);  /* check function arguments */  /* --- initialize --- */  if (ist->size > ist->lvlcnt)  /* if the tree is not high enough */    return -1;                  /* for the rule length, abort */  s_rule = (int)ceil(ist->tacnt *ist->supp);  /* minimal rule support */  s_min  = (ist->rsdef == IST_BOTH) ? s_rule  /* minimal set  support */         : (int)ceil(ist->tacnt *ist->supp *ist->conf);  if (ist->node)                /* if this is not the first rule, */    node = ist->node;           /* get the buffered item set node */  else {                        /* if this is the first rule */    node = ist->node = ist->levels[ist->size -1];    ist->index = ist->item = -1;/* initialize the */  }                             /* rule extraction variables */  /* --- find rule --- */  while (1) {                   /* search for a rule */    if (ist->item >= 0) {       /* --- select next item subset */      *--ist->path = ist->item; /* add previous head to the path */      ist->plen++;              /* and get the next head item */      ist->item = ID(ist->head);      ist->head = ist->head->parent;      if (!ist->head)           /* if all subsets have been processed */        ist->item = -1;         /* clear the head item to trigger the */    }                           /* selection of a new item set */    if (ist->item < 0) {        /* --- select next item set */      if (++ist->index >= node->size){/* if all subsets have been */        node = node->succ;      /* processed, go to the successor */        if (!node) {            /* if at the end of a level, go down */          if (++ist->size > ist->lvlcnt)            return -1;          /* if beyond the deepest level, abort */          node = ist->levels[ist->size -1];        }                       /* get the 1st node of the new level */        ist->node = node;       /* note the new item set node and */        ist->index  = 0;        /* start with the first item set */      }                         /* of the new item set node */      if (node->offset >= 0) item = node->offset +ist->index;      else                   item = node->cnts[node->size +ist->index];      if ((ist->apps[item] == IST_IGNORE)      ||  (HDONLY(node) && (ist->apps[item] == IST_HEAD)))        continue;               /* skip sets with two head only items */      ist->item   = item;       /* set the head item identifier */      ist->hdonly = HDONLY(node) || (ist->apps[item] == IST_HEAD);      ist->head   = node;       /* set the new head item node */      ist->path   = ist->buf +ist->lvlvsz;      ist->plen   = 0;          /* clear the path */    }    app = ist->apps[ist->item]; /* get head item appearance */    if (!(app & IST_HEAD) || (ist->hdonly && (app != IST_HEAD)))      continue;                 /* if rule is not allowed, skip it */    s_set = node->cnts[ist->index]; /* get the item set support */    if (s_set < s_min) {        /* if the set support is too low, */      ist->item = -1; continue; }   /* skip this item set */    parent = node->parent;      /* get the parent node */    if (ist->plen > 0)          /* if there is a path, use it */      s_sub = _getsupp(ist->head, ist->path, ist->plen);    else if (!parent)           /* if there is no parent (root node), */      s_sub = ist->tacnt;       /* get the number of transactions */    else if (parent->offset >= 0)   /* if a pure vector is used */      s_sub = parent->cnts[ID(node) -parent->offset];    else {                      /* if an identifier map is used */      map = parent->cnts +(n = parent->size);      vec = (ISNODE**)(map +n); /* get id. map and child vector */      s_sub = parent->cnts[_bsearch(map, n, ID(node))];    }                           /* find vector index and get support */    if (s_sub < s_rule)         /* if the subset support is too low, */      continue;                 /* get the next subset/next set */    c = (s_sub > 0)             /* compute the rule confidence */      ? (double)s_set/s_sub : 1;    if (c < ist->conf -EPSILON) /* if the confidence is too low, */      continue;                 /* get the next item subset/item set */    if (ist->arem == EM_NONE) { /* if no add. eval. measure given, */      v = 0; break; }           /* abort the loop (select the rule) */    if (ist->size < 2) {        /* if rule has an empty antecedent, */      v = 0; break; }           /* abort the loop (select the rule) */    if (ist->tacnt <= 0)        /* if there are no transactions, */      p_body = p_head = 1;      /* all probabilities are 1 */    else {                      /* if there are transactions */      p_body = (double)s_sub                           /ist->tacnt;      p_head = (double)ist->levels[0]->cnts[ist->item] /ist->tacnt;    }                           /* estimate prior probabilities */    v = _evalfns[ist->arem](p_head, p_body, c);    if (v >= ist->minval)       /* if rule value exceeds the minimal */      break;                    /* of the add. rule eval. measure, */  }  /* while (1) */            /* abort the loop (select rule) */  *supp = (ist->tacnt <= 0) ? 1 /* compute the rule support */        : ((ist->rsdef == IST_BODY) ? s_sub : s_set)        / (double)ist->tacnt;   /* (respect the rule support def.) */  if (lift)                     /* compute and store the lift value */    *lift = (c *ist->tacnt)/(double)ist->levels[0]->cnts[ist->item];  /* --- build rule --- */  if (node->offset >= 0) item = node->offset +ist->index;  else                   item = node->cnts[node->size +ist->index];  i = ist->size;                /* get the current item and */  if (item != ist->item)        /* if this item is not the head, */    rule[--i] = item;           /* add it to the rule body */  while (node->parent) {        /* traverse the path to the root */    if (ID(node) != ist->item)  /* and add all items on this */      rule[--i] = ID(node);     /* path to the rule body */    node = node->parent;        /* (except the head of the rule) */  }  rule[0] = ist->item;          /* set the head of the rule, */  *conf   = c;                  /* the rule confidence, and */  if (aval) *aval = v;          /* the value of the add. measure */  return ist->size;             /* return the rule size */}  /* ist_rule() *//*--------------------------------------------------------------------*/int ist_hedge (ISTREE *ist, int *hedge, double *supp, double *conf){                               /* --- extract next hyperedge */  int    i;                     /* loop variable */  int    item;                  /* an item identifier */  ISNODE *node;                 /* current item set node */  ISNODE *head;                 /* node containing the rule head */  ISNODE **vec;                 /* child node vector of head node */  int    *map, n;               /* identifier map and its size */  int    *path, plen;           /* path in tree and its length */  int    s_min;                 /* minimal support of a hyperedge */  int    s_set;                 /* support of set    (body & head) */  int    s_sub;                 /* support of subset (body) */  assert(ist && hedge && supp && conf);  /* check function arguments */  /* --- initialize --- */  if (ist->size > ist->lvlcnt)  /* if the tree is not high enough */    return -1;                  /* for the hyperedge size, abort */  s_min = (int)ceil(ist->tacnt *ist->supp); /* get minimal support */  if (!ist->node)               /* on first hyperedge, initialize */    ist->node = ist->levels[ist->size -1];    /* the current node */  node = ist->node;             /* get the current item set node */  /* --- find hyperedge --- */  while (1) {                   /* search for a hyperedge */    if (++ist->index >= node->size) { /* if all subsets have been */      node = node->succ;        /* processed, go to the successor */      if (!node) {              /* if at the end of a level, go down */        if (++ist->size > ist->lvlcnt)          return -1;            /

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -