⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fpgrowth.c

📁 Data Minig中的FP GROWTH 算法,附带test实例及实验数据分析
💻 C
字号:
/*----------------------------------------------------------------------
  文件    : fpgrowth.c
  内容: fpgrowth 算法主程序 
  作者  : 杨明智,黄昊          
----------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <float.h>
#include <math.h>
#include <time.h>
#include <assert.h>
#include "scan.h"
#include "fptree.h"
#ifdef STORAGE
#include "storage.h"
#endif

/*----------------------------------------------------------------------
 程序预处理定义部分 
----------------------------------------------------------------------*/
#define PRGNAME     "FP_05213149_05213084"
#define DESCRIPTION "使用 fpgrowth 算法" \
                    "查找频繁集。"
#define VERSION     "version 1.0 (2006.6.25)" \
                    "实现:杨明智(05213149),黄昊(05213084)"

#define OF_REL      0x01        
#define OF_ABS      0x02        
#define OF_DEV      0x04        
#define OF_SCANF    0x08        
/*定义错误代码*/
#define E_OPTION    (-5)        
#define E_OPTARG    (-6)        
#define E_ARGCNT    (-7)        
#define E_STDIN     (-8)        
#define E_SUPP      (-9)        
#define E_ITEMCNT  (-10)        
#define E_NOTAS    (-11)        
#define E_UNKNOWN  (-18)        

#ifndef QUIET 
/*输出信息预定义*/
#define MSG(x)        x         
#else                           
#define MSG(x)                  
#endif

#define SEC_SINCE(t)  ((clock()-(t)) /(double)CLOCKS_PER_SEC)
#define RECCNT(s)     (tfs_reccnt(is_tfscan(s)) \
                      + ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))
#define BUFFER(s)     tfs_buf(is_tfscan(s))
/*----------------------------------------------------------------------
 常量定义部分 
----------------------------------------------------------------------*/
#define LN_2       0.69314718055994530942  

#ifndef QUIET                   
/*错误信息输出定义*/
static const char *errmsgs[] = {  "no error\n", "not enough memory\n","cannot open file %s\n",
 "read error on file %s\n", "write error on file %s\n","unknown option -%c\n",
"missing option argument\n", "wrong number of arguments\n","double assignment of standard input\n",
"invalid minimal support %g%%\n","invalid number of items %d\n", "no items or transactions to work on\n",
 NULL, NULL, NULL, NULL, "file %s, record %d: item expected\n","file %s, record %d: duplicate item %s\n",
 "unknown error\n"};
#endif

/*----------------------------------------------------------------------
  全局变量定义
----------------------------------------------------------------------*/
#ifndef QUIET
static char    *prgname;        
#endif
static ITEMSET *itemset = NULL; /* item集 */
static TASET   *taset   = NULL; /* transaction集 */
static FPTREE  *fptree  = NULL; 
static FILE    *in      = NULL; /* 输入文件名 */
static FILE    *out     = NULL; /* 输出文件名 */
static int     tacnt    = 0;    /* transaction 的数目 */
static double  mindev   = -DBL_MAX; 
static double  *logfs   = NULL;    
static int     flags    = OF_REL;   /*输出标志位 */
static char    *fmt     = "%.1f";    
static char    buf[4*TFS_SIZE+4];   

/*----------------------------------------------------------------------
  项集结果输出函数 
----------------------------------------------------------------------*/
static int _report (int *ids, int cnt, int supp, void *data)
{    
  int        i; 
  const char *name; 
  double     dev = 0; 
  assert(ids);                  /* 检查参数是否正确 */
  if (flags & OF_DEV) {         
    for (i = cnt; --i >= 0; )   
      dev += logfs[ids[i]];     
    dev = (log(supp/(double)tacnt) -dev) /LN_2;
    if (dev < mindev) return 0; 
  }                             
  for (i = 0; i < cnt; i++) {   
    name = is_name(itemset, ids[i]);
    if (flags & OF_SCANF) {     
      sc_format(buf, name, 0); name = buf; }
    fputs(name, out); fputc(' ', out);
  }                             /* 输出item的名*/
  fputs(" (", out);             /* 输出支持度*/
  if (flags & OF_REL) { fprintf(out, fmt, (supp/(double)tacnt) *100);
                        fputs((flags & OF_ABS) ? "%/" : "%", out); }
  if (flags & OF_ABS) { fprintf(out, "%d", supp); }
  if (flags & OF_DEV) { fputs(", ", out); fprintf(out, fmt, dev); }
  fputs(")\n", out);            
  return 1;   
} 

/*----------------------------------------------------------------------
  错误处理函数 
----------------------------------------------------------------------*/
static void error (int code, ...)
{  
  #ifndef QUIET 
  va_list    args;              /* 参数表*/
  const char *msg;              /* 错误信息变量 */

  assert(prgname);              
  if (code < E_UNKNOWN) code = E_UNKNOWN;
  if (code < 0) {               /* 如果出现错误(错误代码为负)*/
    msg = errmsgs[-code];       /* 获取具体的错误信息 */
    if (!msg) msg = errmsgs[-E_UNKNOWN];
    fprintf(stderr, "\n%s: ", prgname);
    va_start(args, code);      
    vfprintf(stderr, msg, args);/* 输出错误信息 */
    va_end(args);  
  }
  #endif
  #ifndef NDEBUG                
  if (fptree)  fpt_delete(fptree);    
  if (taset)   tas_delete(taset, 0);  
  if (itemset) is_delete(itemset);
  if (in  && (in  != stdin))  fclose(in);
  if (out && (out != stdout)) fclose(out);
  #endif
  #ifdef STORAGE                
  showmem("at end of program"); 
  #endif
  exit(code);   
} 

/*--------------------------------------------------------------------
 程序主函数 
--------------------------------------------------------------------*/
int main (int argc, char *argv[])
{                              
  int     i, k = 0, n;          /* 循环计数标志 */
  char    *s       =  NULL;                   
  char    **optarg = NULL;      
  char    *fn_in   = NULL;      /* 输入文件名 */
  char    *fn_out  = NULL;      /* 输出文件名 */
  char    *blanks  = NULL;      
  char    *fldseps = NULL;      
  char    *recseps = NULL;      
  char    *cominds = NULL;   
  char    *transactionThreshold = NULL;
  double  supp     = 0.1;       /* 默认最小支持度 */
  int     min      =  1;        
  int     max      =  5;        
  int     sort     = -2;        /* 排序标志位 */
  int     mode     = FPT_BONSAI;
  int     heap     =  1;       
  int     *map;                 
  clock_t t;                    /* 计时器 */
  double  totalTime=0;
  #ifndef QUIET   
  prgname = argv[0]; 

  /* 使用方法介绍 */
  if (argc > 1) {      
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } 
  else {    
    printf("用法: %s Transaction_file Transaction_number Item_number Support_threshold\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    return 0;   
  }                             
  #endif

  fn_in=argv[1];
  fn_out="pattern.txt";
  transactionThreshold = argv[4];
  supp   = 0.01*strtod(transactionThreshold, &transactionThreshold);  

  /* 建立item集和transaction集 */
  itemset = is_create();        
  if (!itemset) error(E_NOMEM); 
  is_chars(itemset, blanks, fldseps, recseps, cominds);
  taset = tas_create(itemset); 
  if (!taset) error(E_NOMEM);  
  MSG(fprintf(stderr, "\n"));  

  /* 读取输入文件,获得并计算transaction和item*/
  t = clock();                  /* 开始计时器*/
  if (fn_in && *fn_in)          
    in = fopen(fn_in, "r");  
  else {   
    in = stdin; fn_in = "<stdin>"; } 
  MSG(fprintf(stderr, "读取文件 %s : ", fn_in));
  if (!in) error(E_FOPEN, fn_in);
  for (tacnt = 0; 1; tacnt++) { /* 循环读取transaction */
    k = is_read(itemset, in);  
    if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
    if (k > 0) break;           /* 如果出现文件读取错误或者文件结束,则退出循环 */
    if (tas_add(taset, NULL, 0) != 0)
      error(E_NOMEM);          
  }                            
  if (in != stdin) fclose(in); 
  in = NULL;                    
  n  = is_cnt(itemset);         
  MSG(fprintf(stderr, "[%d item,", n));
  MSG(fprintf(stderr, " %d transaction] 用时: ", tacnt));
  MSG(fprintf(stderr, "[%.2fs].", SEC_SINCE(t)));
  totalTime+=SEC_SINCE(t);
  if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS);
  MSG(fprintf(stderr, "\n"));   
  if (supp < 0) {      
    if (!(flags & OF_ABS)) flags = (flags & ~OF_REL) | OF_ABS;
    supp = (-100 *supp -0.5) /tacnt;
    if (supp < 0) supp = 0;     /* 计算支持度 */
  }    

  /* 把item按降序排序 */
  supp = ceil(tacnt *supp);    
  MSG(fprintf(stderr, "Item 排序和编码: "));
  t   = clock();                /* 开始计时器 */
  map = (int*)malloc(is_cnt(itemset) *sizeof(int));
  if (!map) error(E_NOMEM);     
  n = is_recode(itemset, (int)supp, sort, map);
  is_trunc(itemset, n);         
  tas_recode(taset, map, n);    
  free(map);                    
  MSG(fprintf(stderr, "[%d item] ", n));
  MSG(fprintf(stderr, "用时: [%.2fs].", SEC_SINCE(t)));
  totalTime+=SEC_SINCE(t);
  if (n <= 0) error(E_NOTAS);   
  MSG(fprintf(stderr, "\n"));   /* 检查Item数目 */
  if (flags & OF_DEV) {        
    logfs = (double*)malloc(n *sizeof(double));
    if (!logfs) error(E_NOMEM); 
    for (i = n; --i >= 0; ) {  
      k = is_getfrq(itemset, i);
      logfs[i] = (k > 0) ? log(k/(double)tacnt) : 0;
    }                           
  }                        

  /* 建立FP树*/
  MSG(fprintf(stderr, "建立FP树"));
  t = clock();                  /* 开始计时器 */
  tas_sort(taset, heap);        /* 排序transaction */
  fptree = fpt_create(taset);   /* 建立FP树 */
  if (!fptree) error(E_NOMEM);  
  tas_delete(taset, 0);         /* 删除transaction集并打印用时信息 */
  taset = NULL;               
  MSG(fprintf(stderr, "用时: [%.2fs].\n", SEC_SINCE(t)));
  totalTime+=SEC_SINCE(t);

  /* 找到频繁集 */
  t = clock();                  /* 开始计时器 */
  if (fn_out && *fn_out)        
    out = fopen(fn_out, "w");   
  else {                        
    out = stdout; fn_out = "<stdout>"; }   
  MSG(fprintf(stderr, "写结果文件 %s ", fn_out));
  if (!out) error(E_FOPEN, fn_out);
  k = fpt_search(fptree, (int)supp, min, max, mode, _report, NULL);
  if (k < 0) error(E_NOMEM);    /* 搜索频繁集 */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* 关闭输出文件 */
  MSG(fprintf(stderr, "[%d set] 用时: ", k));
  MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
  totalTime+=SEC_SINCE(t);
  MSG(fprintf(stderr, "\n总用时:[%.2fs].\n", totalTime));

  #ifndef NDEBUG                
  if (logfs) free(logfs);       
  fpt_delete(fptree);           
  is_delete(itemset);           
  #endif
  #ifdef STORAGE                
  showmem("FP算法程序结束"); 
  #endif
  return 0;                     
} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -