📄 fptree.c
字号:
/*---------------------------------------------------------------------- 文件 : fpgrowth.c 内容: fptree 生成程序 作者 : 杨明智,黄昊 ----------------------------------------------------------------------*/ #include <stdio.h>#include <stdlib.h>#include <assert.h>#include "fptree.h"#ifdef STORAGE#include "storage.h"#endif#define BLKSIZE 6553 typedef FPTREE* PROJFN (FPTREE *fpt, int item);typedef struct { /* REC.SEARCH的结构定义*/ int min; /* 最小项数 */ int max; /* 最大项数 */ int supp; /* 最小支持度 */ int bonsai; /* 使用bonsai算法剪枝的标志 */ PROJFN *proj; /* 投影函数 */ FPTREPFN *report; /* 结果汇报函数 */ void *data; /* 汇报函数的使用数据 */ int cnt; /* 频繁项集中的项数 */ int items[1]; /* 汇报项的向量 */} FPRS; /*---------------------------------------------------------------------- 主函数----------------------------------------------------------------------*/static FPTREE* _create (MEMSYS *mem, int cnt){ /* 创造一个新的FP树 */ FPTREE *fpt; /* 频繁模式树 */ FPTLIST *list; /* 节点链表 */ assert(cnt > 0); fpt = (FPTREE*)malloc(sizeof(FPTREE) +(cnt-1) *sizeof(FPTLIST)); if (!fpt) return NULL; fpt->cnt = cnt; if (mem) fpt->mem = mem; /* 如果有存储管理器,简单地进行存放 */ else { /*否则安排一个存储系统*/ fpt->mem = ms_create(sizeof(FPTNODE), BLKSIZE); if (!fpt->mem) { free(fpt); return NULL; } } for (list = fpt->lists +cnt; --cnt >= 0; ) { (--list)->cnt = 0; list->node = NULL; } return fpt; /* 初始化节点链表 */} /* 返回初始FP树 *//*--------------------------------------------------------------------*/static int _build (FPTREE *fpt, FPTNODE *parent, TASET *taset, int lft, int rgt, int pos){ /* 递归建立FP树 */ int i, k; /* loop 变量, buffer */ int item; FPTNODE *node; /* 创造频繁模式树节点 */ assert(fpt && taset && (pos >= 0)); while ((lft <= rgt) && (tas_tsize(taset, lft) <= pos)) lft++; if (lft > rgt) return 0; item = k = tas_tract(taset, i = rgt)[pos]; /* 获取第一个项 */ do { while (--i >= lft) { k = tas_tract(taset, i)[pos]; if (k != item) break; /* 寻找含有不同item的transaction*/ } node = ms_alloc(fpt->mem); /* 为当前item创建新的结点并在结点中保存item*/ if (!node) return -1; node->item = item; node->succ = fpt->lists[item].node; fpt->lists[item].node = node; node->parent = parent; /* 把新建结点加入到结点列表中并计算支持度*/ node->copy = NULL; fpt->lists[item].cnt += node->cnt = rgt -i; if (_build(fpt, node, taset, i+1, rgt, pos+1) != 0) return -1; /* 递归建立子结点*/ item = k; rgt = i; } while (lft <= rgt); return 0; } /*--------------------------------------------------------------------*/FPTREE* fpt_create (TASET *taset){ /* 建立FP树*/ FPTREE *fpt; assert(taset); /* 检查参数是否合法 */ fpt = _create(NULL, is_cnt(tas_itemset(taset))); if (!fpt) return NULL; fpt->itemset = tas_itemset(taset); fpt->tra = tas_cnt(taset); if ((fpt->tra > 0) /* 至少需要有一个transaction*/ && (_build(fpt, NULL, taset, 0, fpt->tra -1, 0) != 0)) { fpt_delete(fpt); return NULL; } return fpt; /* 递归构建FP树*/} /* 删除FP树*/void fpt_delete (FPTREE *fpt){ assert(fpt); ms_delete(fpt->mem); free(fpt); } static void _prune (FPTREE *fpt){ /* 剪枝函数 */ FPTNODE *node, *buf; assert(fpt); /* 检查参数是否合法 */ for (node = fpt->lists[--fpt->cnt].node; node; ) { buf = node; /* 如果存在另外一个结点,则标记现有结点并从表中删除 */ node = node->succ; ms_free(fpt->mem, buf); } } /* 分离投影 */static void _detach (FPTREE *fpt){ FPTNODE *node, *anc; assert(fpt); /* 检查参数合法性 */ node = fpt->lists[--fpt->cnt].node; while (node) { for (anc = node->parent; anc && anc->copy; anc = anc->parent) anc->copy = NULL; anc = node; node = node->succ; ms_free(fpt->mem, anc); } } /*如果失败则处理清除工作*/static void _cleanup (FPTREE *fpt, FPTREE *proj){ assert(fpt && proj); _detach(fpt); while (proj->cnt > 0) _prune(proj); free(proj); } /* 投影FP树 */static FPTREE* _project1 (FPTREE *fpt, int item){ int i; /* 循环标记位*/ FPTREE *proj; FPTNODE *node, *anc, *copy; FPTNODE **prev; /* 祖先结点指针 */ FPTLIST *lists; assert(fpt); /* 检查参数合法性 */ proj = _create(fpt->mem, item); if (!proj) return NULL; /* 建立基础FP树*/ proj->itemset = fpt->itemset; lists = proj->lists; /* 获得FP树投影的结点列表 */ for (node = fpt->lists[item].node; node; node = node->succ) { prev = NULL; for (anc = node->parent; anc && !anc->copy; anc = anc->parent) { anc->copy = copy = ms_alloc(fpt->mem); if (!copy) { _cleanup(fpt, proj); return NULL; } if (prev) *prev = copy; copy->item = i = anc->item; copy->succ = lists[i].node; lists[i].node = copy; lists[i].cnt += copy->cnt = node->cnt; copy->copy = NULL; /* 设立节点的支持度并记录父结点指针*/ prev = ©->parent; } if (prev) *prev = (anc) ? anc->copy : NULL; for ( ; anc; anc = anc->parent) { anc->copy->cnt += node->cnt; lists[anc->item].cnt += node->cnt; } /* 向根遍历祖先结点并计算支持度总和*/ } _detach(fpt); return proj; } /* 将投影剪枝到bonsai函数 */static void _bonsai (FPTREE *fpt, int supp){ int i, k; FPTLIST *lists; /* 跟踪层列表*/ FPTNODE *node, *anc, *prev; /* 跟踪FP树结点*/ lists = fpt->lists; for (i = fpt->cnt; (--i >= 0) && (lists[i].cnt < supp); ) _prune(fpt); /* 把最深的那层剪枝*/ for (k = 0; k < fpt->cnt; k++) if ((lists[k].cnt < supp) && (lists[k].cnt > 0)) break; /* 找到第一个非频繁item*/ for (i = k; ++i < fpt->cnt;){ if (lists[i].cnt < supp) continue; prev = NULL; for (node = lists[i].node; node; node = node->succ) { for (anc = node->parent; anc && (lists[anc->item].cnt < supp); ) anc = anc->parent; /* 跳过非频繁item的祖先item*/ if (anc && anc->copy) anc = anc->copy; if (prev && (anc == prev->parent)) { prev->cnt += node->cnt; /* 如果两个连续的节点有共同的父结点则合并这两个节点 */ node->copy = prev; } else { /* 如果没有父结点或者有不同的父结点则设置新的父结点*/ node->parent = anc; prev = node; } } } for ( ; k < fpt->cnt; k++) { node = lists[k].node; if (!node) continue; /* 跳过空的层*/ if (lists[k].cnt < supp) { do { anc = node; node = node->succ; ms_free(fpt->mem, anc); } while (node); lists[k].node = NULL; lists[k].cnt = 0; continue; } while (node->succ) { if (!node->succ->copy) { node = node->succ; continue; } anc = node->succ; node->succ = anc->succ; ms_free(fpt->mem, anc); } }} /* 搜索FP*/static int _search (FPTREE *fpt, int d, FPRS *rs){ int i, k; FPTREE *proj; FPTLIST *list; assert(fpt); d++; for (i = fpt->cnt; --i >= 0; ) { list = fpt->lists +i; if (list->cnt < rs->supp) { _prune(fpt); continue; } rs->items[d-1] = i; if (d >= rs->min) rs->cnt += rs->report(rs->items, d, list->cnt, rs->data); if (!fpt->lists[i].node || (d >= rs->max) || (i <= 0)) { _prune(fpt); continue; } proj = rs->proj(fpt, i); if (rs->bonsai) _bonsai(proj, rs->supp); k = _search(proj, d, rs); free(proj); if (k) return -1; } return 0; } /*--------------------------------------------------------------------*/int fpt_search (FPTREE *fpt, int supp, int min, int max, int mode, FPTREPFN report, void *data){ int n; FPRS *rs; assert(fpt && (supp > 0) && (max >= min) && (min >= 0) && report); rs = (FPRS*)malloc(sizeof(FPRS) +(fpt->cnt -1) *sizeof(int)); if (!rs) return -1; rs->supp = (supp > 0) ? supp : 1; rs->min = min; rs->max = max; rs->bonsai = (mode & FPT_BONSAI) ? 1 : 0; rs->proj = _project1; rs->report = report; rs->data = data; rs->cnt = 0; if (_search(fpt, 0, rs) != 0) return -1; n = rs->cnt; free(rs); return n; } /*--------------------------------------------------------------------*/#ifndef NDEBUGvoid fpt_show (FPTREE *fpt, const char *title){ int i; FPTNODE *node; printf("\n%s\n", title); for (i = 0; i < fpt->cnt; i++) { printf("%s ", is_name(fpt->itemset, i)); printf("(%d):", fpt->lists[i].cnt); for (node = fpt->lists[i].node; node; node = node->succ) { printf(" %d", node->cnt); if (node->parent) printf("[%s]", is_name(fpt->itemset, node->parent->item)); } printf("\n"); } } #endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -