⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fptree.c

📁 Data Minig中的FP GROWTH 算法,附带test实例及实验数据分析
💻 C
字号:
/*----------------------------------------------------------------------  文件    : fpgrowth.c  内容: fptree 生成程序   作者  : 杨明智,黄昊          ----------------------------------------------------------------------*/ #include <stdio.h>#include <stdlib.h>#include <assert.h>#include "fptree.h"#ifdef STORAGE#include "storage.h"#endif#define BLKSIZE    6553   typedef FPTREE* PROJFN (FPTREE *fpt, int item);typedef struct {                /* REC.SEARCH的结构定义*/  int      min;                 /* 最小项数 */  int      max;                 /* 最大项数 */  int      supp;                /* 最小支持度 */  int      bonsai;              /* 使用bonsai算法剪枝的标志 */  PROJFN   *proj;               /* 投影函数 */  FPTREPFN *report;             /* 结果汇报函数 */  void     *data;               /* 汇报函数的使用数据 */  int      cnt;                 /* 频繁项集中的项数 */  int      items[1];            /* 汇报项的向量 */} FPRS;                         /*---------------------------------------------------------------------- 主函数----------------------------------------------------------------------*/static FPTREE* _create (MEMSYS *mem, int cnt){                               /* 创造一个新的FP树 */  FPTREE  *fpt;                 /* 频繁模式树 */  FPTLIST *list;                /* 节点链表 */  assert(cnt > 0);                fpt = (FPTREE*)malloc(sizeof(FPTREE) +(cnt-1) *sizeof(FPTLIST));  if (!fpt) return NULL;          fpt->cnt = cnt;                if (mem) fpt->mem = mem;      /* 如果有存储管理器,简单地进行存放 */  else {                        /*否则安排一个存储系统*/     fpt->mem = ms_create(sizeof(FPTNODE), BLKSIZE);    if (!fpt->mem) { free(fpt); return NULL; }  }                             for (list = fpt->lists +cnt; --cnt >= 0; ) {    (--list)->cnt = 0; list->node = NULL; }  return fpt;                   /* 初始化节点链表 */}                               /* 返回初始FP树 *//*--------------------------------------------------------------------*/static int _build (FPTREE *fpt, FPTNODE *parent,                   TASET *taset, int lft, int rgt, int pos){                               /* 递归建立FP树 */  int     i, k;                 /* loop 变量, buffer */  int     item;                   FPTNODE *node;                /* 创造频繁模式树节点 */  assert(fpt && taset && (pos >= 0));   while ((lft <= rgt) && (tas_tsize(taset, lft) <= pos))    lft++;                        if (lft > rgt) return 0;        item = k = tas_tract(taset, i = rgt)[pos];    /* 获取第一个项 */  do {                              while (--i >= lft) {              k = tas_tract(taset, i)[pos];      if (k != item) break;     /* 寻找含有不同item的transaction*/    }                               node = ms_alloc(fpt->mem);  /* 为当前item创建新的结点并在结点中保存item*/    if (!node) return -1;           node->item   = item;            node->succ   = fpt->lists[item].node;    fpt->lists[item].node = node;    node->parent = parent;      /* 把新建结点加入到结点列表中并计算支持度*/    node->copy   = NULL;            fpt->lists[item].cnt += node->cnt = rgt -i;    if (_build(fpt, node, taset, i+1, rgt, pos+1) != 0)      return -1;                /* 递归建立子结点*/    item = k; rgt = i;           } while (lft <= rgt);           return 0;                   }  /*--------------------------------------------------------------------*/FPTREE* fpt_create (TASET *taset){                               /* 建立FP树*/  FPTREE *fpt;               assert(taset);                /* 检查参数是否合法 */  fpt = _create(NULL, is_cnt(tas_itemset(taset)));  if (!fpt) return NULL;          fpt->itemset = tas_itemset(taset);  fpt->tra     = tas_cnt(taset);  if ((fpt->tra > 0)            /* 至少需要有一个transaction*/  &&  (_build(fpt, NULL, taset, 0, fpt->tra -1, 0) != 0)) {    fpt_delete(fpt); return NULL; }  return fpt;                   /* 递归构建FP树*/}  /* 删除FP树*/void fpt_delete (FPTREE *fpt){                                 assert(fpt);                    ms_delete(fpt->mem);            free(fpt);                    } static void _prune (FPTREE *fpt){                               /* 剪枝函数 */  FPTNODE *node, *buf;          assert(fpt);                  /* 检查参数是否合法 */  for (node = fpt->lists[--fpt->cnt].node; node; ) {    buf  = node;                /* 如果存在另外一个结点,则标记现有结点并从表中删除 */    node = node->succ;             ms_free(fpt->mem, buf);       }                             } /* 分离投影 */static void _detach (FPTREE *fpt){                                 FPTNODE *node, *anc;         assert(fpt);                  /* 检查参数合法性 */  node = fpt->lists[--fpt->cnt].node;  while (node) {                   for (anc = node->parent; anc && anc->copy; anc = anc->parent)      anc->copy = NULL;             anc  = node;                    node = node->succ;              ms_free(fpt->mem, anc);       }                             }  /*如果失败则处理清除工作*/static void _cleanup (FPTREE *fpt, FPTREE *proj){                                 assert(fpt && proj);           _detach(fpt);                  while (proj->cnt > 0) _prune(proj);  free(proj);                 }          /* 投影FP树 */static FPTREE* _project1 (FPTREE *fpt, int item){                                int     i;                    /* 循环标记位*/  FPTREE  *proj;                  FPTNODE *node, *anc, *copy;     FPTNODE **prev;               /* 祖先结点指针 */  FPTLIST *lists;                 assert(fpt);                  /* 检查参数合法性 */  proj = _create(fpt->mem, item);  if (!proj) return NULL;       /* 建立基础FP树*/  proj->itemset = fpt->itemset;   lists = proj->lists;          /* 获得FP树投影的结点列表 */  for (node = fpt->lists[item].node; node; node = node->succ) {    prev = NULL;                    for (anc = node->parent; anc && !anc->copy; anc = anc->parent) {      anc->copy =                    copy = ms_alloc(fpt->mem);             if (!copy) { _cleanup(fpt, proj); return NULL; }      if (prev) *prev = copy;         copy->item = i = anc->item;      copy->succ = lists[i].node;      lists[i].node = copy;           lists[i].cnt += copy->cnt = node->cnt;      copy->copy = NULL;        /* 设立节点的支持度并记录父结点指针*/      prev = &copy->parent;        }                              if (prev)                        *prev = (anc) ? anc->copy : NULL;    for ( ; anc; anc = anc->parent) {      anc->copy->cnt       += node->cnt;      lists[anc->item].cnt += node->cnt;    }                           /* 向根遍历祖先结点并计算支持度总和*/  }                               _detach(fpt);                   return proj;                  }           /* 将投影剪枝到bonsai函数 */static void _bonsai (FPTREE *fpt, int supp){                                 int     i, k;                   FPTLIST *lists;               /* 跟踪层列表*/  FPTNODE *node, *anc, *prev;   /* 跟踪FP树结点*/  lists = fpt->lists;             for (i = fpt->cnt; (--i >= 0) && (lists[i].cnt < supp); )    _prune(fpt);                /* 把最深的那层剪枝*/  for (k = 0; k < fpt->cnt; k++)    if ((lists[k].cnt < supp) && (lists[k].cnt > 0))      break;                    /* 找到第一个非频繁item*/  for (i = k; ++i < fpt->cnt;){     if (lists[i].cnt < supp)          continue;                     prev = NULL;                    for (node = lists[i].node; node; node = node->succ) {      for (anc = node->parent; anc && (lists[anc->item].cnt < supp); )        anc = anc->parent;      /* 跳过非频繁item的祖先item*/      if (anc && anc->copy)            anc = anc->copy;             if (prev && (anc == prev->parent)) {        prev->cnt += node->cnt; /* 如果两个连续的节点有共同的父结点则合并这两个节点 */        node->copy = prev; }          else {                    /* 如果没有父结点或者有不同的父结点则设置新的父结点*/        node->parent = anc;            prev = node;                  }                             }                             }                               for ( ; k < fpt->cnt; k++) {      node = lists[k].node;           if (!node) continue;        /* 跳过空的层*/    if (lists[k].cnt < supp) {        do {                              anc  = node;                    node = node->succ;             ms_free(fpt->mem, anc);       } while (node);                 lists[k].node = NULL; lists[k].cnt = 0;      continue;                     }    while (node->succ) {              if (!node->succ->copy) {          node = node->succ; continue; }        anc = node->succ;               node->succ = anc->succ;        ms_free(fpt->mem, anc);      }                            }}   /* 搜索FP*/static int _search (FPTREE *fpt, int d, FPRS *rs){                                int     i, k;                   FPTREE  *proj;                  FPTLIST *list;                  assert(fpt);                    d++;                            for (i = fpt->cnt; --i >= 0; ) {    list = fpt->lists +i;            if (list->cnt < rs->supp) {       _prune(fpt); continue; }      rs->items[d-1] = i;             if (d >= rs->min)                 rs->cnt += rs->report(rs->items, d, list->cnt, rs->data);    if (!fpt->lists[i].node         ||  (d >= rs->max)              ||  (i <= 0)) {                   _prune(fpt); continue; }      proj = rs->proj(fpt, i);        if (rs->bonsai)                   _bonsai(proj, rs->supp);      k = _search(proj, d, rs);       free(proj);                    if (k) return -1;             }  return 0;     } /*--------------------------------------------------------------------*/int fpt_search (FPTREE *fpt, int supp, int min, int max, int mode,                FPTREPFN report, void *data){                                 int  n;                         FPRS *rs;                       assert(fpt                    	 && (supp > 0) && (max >= min) && (min >= 0) && report);  rs = (FPRS*)malloc(sizeof(FPRS) +(fpt->cnt -1) *sizeof(int));  if (!rs) return -1;             rs->supp   = (supp > 0) ? supp : 1;  rs->min    = min;                rs->max    = max;               rs->bonsai = (mode & FPT_BONSAI)  ? 1 : 0;  rs->proj   = _project1;  rs->report = report;  rs->data   = data;  rs->cnt    = 0;                 if (_search(fpt, 0, rs) != 0)     return -1;                    n = rs->cnt; free(rs);          return n;                     } /*--------------------------------------------------------------------*/#ifndef NDEBUGvoid fpt_show (FPTREE *fpt, const char *title){                               int     i;                      FPTNODE *node;                                                                printf("\n%s\n", title);              for (i = 0; i < fpt->cnt; i++) {             printf("%s ", is_name(fpt->itemset, i));     printf("(%d):", fpt->lists[i].cnt);          for (node = fpt->lists[i].node; node; node = node->succ) {      printf(" %d", node->cnt);       if (node->parent)                 printf("[%s]", is_name(fpt->itemset, node->parent->item));    }                           printf("\n");                 }                             }  #endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -