📄 fpgrowth.c
字号:
/*----------------------------------------------------------------------
文件 : fpgrowth.c
内容: fpgrowth 算法主程序
作者 : 杨明智,黄昊
----------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <float.h>
#include <math.h>
#include <time.h>
#include <assert.h>
#include "scan.h"
#include "fptree.h"
#ifdef STORAGE
#include "storage.h"
#endif
/*----------------------------------------------------------------------
程序预处理定义部分
----------------------------------------------------------------------*/
#define PRGNAME "FP_05213149_05213084"
#define DESCRIPTION "使用 fpgrowth 算法" \
"查找频繁集。"
#define VERSION "version 1.0 (2006.6.25)" \
"实现:杨明智(05213149),黄昊(05213084)"
#define OF_REL 0x01
#define OF_ABS 0x02
#define OF_DEV 0x04
#define OF_SCANF 0x08
/*定义错误代码*/
#define E_OPTION (-5)
#define E_OPTARG (-6)
#define E_ARGCNT (-7)
#define E_STDIN (-8)
#define E_SUPP (-9)
#define E_ITEMCNT (-10)
#define E_NOTAS (-11)
#define E_UNKNOWN (-18)
#ifndef QUIET
/*输出信息预定义*/
#define MSG(x) x
#else
#define MSG(x)
#endif
#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)
#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \
+ ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))
#define BUFFER(s) tfs_buf(is_tfscan(s))
/*----------------------------------------------------------------------
常量定义部分
----------------------------------------------------------------------*/
#define LN_2 0.69314718055994530942
#ifndef QUIET
/*错误信息输出定义*/
static const char *errmsgs[] = { "no error\n", "not enough memory\n","cannot open file %s\n",
"read error on file %s\n", "write error on file %s\n","unknown option -%c\n",
"missing option argument\n", "wrong number of arguments\n","double assignment of standard input\n",
"invalid minimal support %g%%\n","invalid number of items %d\n", "no items or transactions to work on\n",
NULL, NULL, NULL, NULL, "file %s, record %d: item expected\n","file %s, record %d: duplicate item %s\n",
"unknown error\n"};
#endif
/*----------------------------------------------------------------------
全局变量定义
----------------------------------------------------------------------*/
#ifndef QUIET
static char *prgname;
#endif
static ITEMSET *itemset = NULL; /* item集 */
static TASET *taset = NULL; /* transaction集 */
static FPTREE *fptree = NULL;
static FILE *in = NULL; /* 输入文件名 */
static FILE *out = NULL; /* 输出文件名 */
static int tacnt = 0; /* transaction 的数目 */
static double mindev = -DBL_MAX;
static double *logfs = NULL;
static int flags = OF_REL; /*输出标志位 */
static char *fmt = "%.1f";
static char buf[4*TFS_SIZE+4];
/*----------------------------------------------------------------------
项集结果输出函数
----------------------------------------------------------------------*/
static int _report (int *ids, int cnt, int supp, void *data)
{
int i;
const char *name;
double dev = 0;
assert(ids); /* 检查参数是否正确 */
if (flags & OF_DEV) {
for (i = cnt; --i >= 0; )
dev += logfs[ids[i]];
dev = (log(supp/(double)tacnt) -dev) /LN_2;
if (dev < mindev) return 0;
}
for (i = 0; i < cnt; i++) {
name = is_name(itemset, ids[i]);
if (flags & OF_SCANF) {
sc_format(buf, name, 0); name = buf; }
fputs(name, out); fputc(' ', out);
} /* 输出item的名*/
fputs(" (", out); /* 输出支持度*/
if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100);
fputs((flags & OF_ABS) ? "%/" : "%", out); }
if (flags & OF_ABS) { fprintf(out, "%d", supp); }
if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); }
fputs(")\n", out);
return 1;
}
/*----------------------------------------------------------------------
错误处理函数
----------------------------------------------------------------------*/
static void error (int code, ...)
{
#ifndef QUIET
va_list args; /* 参数表*/
const char *msg; /* 错误信息变量 */
assert(prgname);
if (code < E_UNKNOWN) code = E_UNKNOWN;
if (code < 0) { /* 如果出现错误(错误代码为负)*/
msg = errmsgs[-code]; /* 获取具体的错误信息 */
if (!msg) msg = errmsgs[-E_UNKNOWN];
fprintf(stderr, "\n%s: ", prgname);
va_start(args, code);
vfprintf(stderr, msg, args);/* 输出错误信息 */
va_end(args);
}
#endif
#ifndef NDEBUG
if (fptree) fpt_delete(fptree);
if (taset) tas_delete(taset, 0);
if (itemset) is_delete(itemset);
if (in && (in != stdin)) fclose(in);
if (out && (out != stdout)) fclose(out);
#endif
#ifdef STORAGE
showmem("at end of program");
#endif
exit(code);
}
/*--------------------------------------------------------------------
程序主函数
--------------------------------------------------------------------*/
int main (int argc, char *argv[])
{
int i, k = 0, n; /* 循环计数标志 */
char *s = NULL;
char **optarg = NULL;
char *fn_in = NULL; /* 输入文件名 */
char *fn_out = NULL; /* 输出文件名 */
char *blanks = NULL;
char *fldseps = NULL;
char *recseps = NULL;
char *cominds = NULL;
char *transactionThreshold = NULL;
double supp = 0.1; /* 默认最小支持度 */
int min = 1;
int max = 5;
int sort = -2; /* 排序标志位 */
int mode = FPT_BONSAI;
int heap = 1;
int *map;
clock_t t; /* 计时器 */
double totalTime=0;
#ifndef QUIET
prgname = argv[0];
/* 使用方法介绍 */
if (argc > 1) {
fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
fprintf(stderr, VERSION); }
else {
printf("用法: %s Transaction_file Transaction_number Item_number Support_threshold\n", argv[0]);
printf("%s\n", DESCRIPTION);
printf("%s\n", VERSION);
return 0;
}
#endif
fn_in=argv[1];
fn_out="pattern.txt";
transactionThreshold = argv[4];
supp = 0.01*strtod(transactionThreshold, &transactionThreshold);
/* 建立item集和transaction集 */
itemset = is_create();
if (!itemset) error(E_NOMEM);
is_chars(itemset, blanks, fldseps, recseps, cominds);
taset = tas_create(itemset);
if (!taset) error(E_NOMEM);
MSG(fprintf(stderr, "\n"));
/* 读取输入文件,获得并计算transaction和item*/
t = clock(); /* 开始计时器*/
if (fn_in && *fn_in)
in = fopen(fn_in, "r");
else {
in = stdin; fn_in = "<stdin>"; }
MSG(fprintf(stderr, "读取文件 %s : ", fn_in));
if (!in) error(E_FOPEN, fn_in);
for (tacnt = 0; 1; tacnt++) { /* 循环读取transaction */
k = is_read(itemset, in);
if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
if (k > 0) break; /* 如果出现文件读取错误或者文件结束,则退出循环 */
if (tas_add(taset, NULL, 0) != 0)
error(E_NOMEM);
}
if (in != stdin) fclose(in);
in = NULL;
n = is_cnt(itemset);
MSG(fprintf(stderr, "[%d item,", n));
MSG(fprintf(stderr, " %d transaction] 用时: ", tacnt));
MSG(fprintf(stderr, "[%.2fs].", SEC_SINCE(t)));
totalTime+=SEC_SINCE(t);
if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS);
MSG(fprintf(stderr, "\n"));
if (supp < 0) {
if (!(flags & OF_ABS)) flags = (flags & ~OF_REL) | OF_ABS;
supp = (-100 *supp -0.5) /tacnt;
if (supp < 0) supp = 0; /* 计算支持度 */
}
/* 把item按降序排序 */
supp = ceil(tacnt *supp);
MSG(fprintf(stderr, "Item 排序和编码: "));
t = clock(); /* 开始计时器 */
map = (int*)malloc(is_cnt(itemset) *sizeof(int));
if (!map) error(E_NOMEM);
n = is_recode(itemset, (int)supp, sort, map);
is_trunc(itemset, n);
tas_recode(taset, map, n);
free(map);
MSG(fprintf(stderr, "[%d item] ", n));
MSG(fprintf(stderr, "用时: [%.2fs].", SEC_SINCE(t)));
totalTime+=SEC_SINCE(t);
if (n <= 0) error(E_NOTAS);
MSG(fprintf(stderr, "\n")); /* 检查Item数目 */
if (flags & OF_DEV) {
logfs = (double*)malloc(n *sizeof(double));
if (!logfs) error(E_NOMEM);
for (i = n; --i >= 0; ) {
k = is_getfrq(itemset, i);
logfs[i] = (k > 0) ? log(k/(double)tacnt) : 0;
}
}
/* 建立FP树*/
MSG(fprintf(stderr, "建立FP树"));
t = clock(); /* 开始计时器 */
tas_sort(taset, heap); /* 排序transaction */
fptree = fpt_create(taset); /* 建立FP树 */
if (!fptree) error(E_NOMEM);
tas_delete(taset, 0); /* 删除transaction集并打印用时信息 */
taset = NULL;
MSG(fprintf(stderr, "用时: [%.2fs].\n", SEC_SINCE(t)));
totalTime+=SEC_SINCE(t);
/* 找到频繁集 */
t = clock(); /* 开始计时器 */
if (fn_out && *fn_out)
out = fopen(fn_out, "w");
else {
out = stdout; fn_out = "<stdout>"; }
MSG(fprintf(stderr, "写结果文件 %s ", fn_out));
if (!out) error(E_FOPEN, fn_out);
k = fpt_search(fptree, (int)supp, min, max, mode, _report, NULL);
if (k < 0) error(E_NOMEM); /* 搜索频繁集 */
if (fflush(out) != 0) error(E_FWRITE, fn_out);
if (out != stdout) fclose(out);
out = NULL; /* 关闭输出文件 */
MSG(fprintf(stderr, "[%d set] 用时: ", k));
MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
totalTime+=SEC_SINCE(t);
MSG(fprintf(stderr, "\n总用时:[%.2fs].\n", totalTime));
#ifndef NDEBUG
if (logfs) free(logfs);
fpt_delete(fptree);
is_delete(itemset);
#endif
#ifdef STORAGE
showmem("FP算法程序结束");
#endif
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -