⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assoc.cc,v

📁 charm是基于垂直数据集挖掘关联规则的一个著名算法
💻 CC,V
字号:
head	1.3;access;symbols;locks	zaki:1.3; strict;comment	@// @;1.3date	2001.09.22.19.35.24;	author zaki;	state Exp;branches;next	1.2;1.2date	2001.06.24.21.17.20;	author zaki;	state Exp;branches;next	1.1;1.1date	2001.06.12.16.41.58;	author zaki;	state Exp;branches;next	;desc@Charm with Hashing.@1.3log@*** empty log message ***@text@#ifdef __GNUC__#pragma implementation "Array.h"#pragma implementation "Util.h"#pragma implementation "List.h"#endif#include <iostream.h>#include <fstream.h>#include <errno.h>#include <stdio.h>#include <stdlib.h>#include <sys/resource.h>#include <unistd.h>#include <math.h>#include <fcntl.h>  //file io#include "assoc.h"#include "eclat.h"#include "extl2.h"#include "partition.h"#include "Graph.h"#include "Itemset.h"#include "timetrack.h"#include "Count.h"#include "calcdb.h"//--------------------------------------#include "stats.h" //MJZdouble Stats::tottime = 0;int Stats::totcand = 0;int Stats::totlarge = 0;Stats stats;ofstream summary("summary.out", ios::app);//--------------------------------------unsigned long int DB_size=0;unsigned long int total_scan=0;unsigned long int sumtidlist=0;struct timeval tp;char hdataf[300];char dataf[300];char idxf[300];char conf[300];char it2f[300];char tempf[300];boolean ext_l2_pass = FALSE;boolean use_char_extl2 = FALSE;boolean use_maximal = FALSE;boolean use_lb = FALSE;boolean use_diff = FALSE;boolean diff_input = FALSE;boolean use_output = FALSE;boolean use_diff_f2 = FALSE;boolean use_horizontal=FALSE;boolean use_hash=FALSE;boolean use_hash_map=FALSE;boolean memflg = FALSE; //compute tot mem usageint process_order = IN_SUP;int maxtidlistsz;ofstream FOUT;extern CountAry FreCount,CloCount;Dbase_Ctrl_Blk *DCB=NULL; //which algorithm to run and parameterschar *alg_name[] = {#define BFS 0   "bfs",#define EQC 1   "eqc",#define SEARCH 2   "search",   NULL};boolean sort_ascend = TRUE;int DBASE_NUM_TRANS;int DBASE_MAXITEM;float DBASE_AVG_TRANS_SZ;int DBASE_MINTRANS; //works only with 1 partitionint DBASE_MAXTRANS;int MINSUPPORT=-1;double MINSUP_PER;long AVAILMEM = 64*MBYTE;long TOTMEM=0;Graph *F2Graph;void parse_args(int argc, char **argv){   extern char * optarg;   char *options, *value;   extern int optind;   int c;   sprintf(tempf,"/tmp/tmp");   if (argc < 2)      cout << "usage: getsup -i<infile>\n";   else{      while ((c=getopt(argc,argv,"a:dDce:hH:i:lm:Mos:zZ:x:"))!=-1){         switch(c){         case 'a': //which algorithm to run            options = optarg;            //cout << ((ECLAT_eqc)?"eqc=1":"eqc=0") << " " <<            //   ((ECLAT_bfs)?"bfs=1":"bfs=0") << " "<<             //   ((ECLAT_search==BOTUP)?"bot":"hyb") << endl;            break;         case 'c':            use_char_extl2 = TRUE;            break;	 case 'd':	   use_diff = TRUE;	   break;         case 'D':            diff_input = TRUE;            use_diff = TRUE;                        break;                     case 'e': //calculate L2 from inverted dbase            num_partitions = atoi(optarg);            ext_l2_pass = TRUE;            break;         case 'h':            use_horizontal = TRUE;            break;         case 'H':            if (atoi(optarg) == 1) use_hash_map = TRUE;            else use_hash = TRUE;            break;         case 'i': //input files            sprintf(dataf,"%s.tpose", optarg);            sprintf(idxf,"%s.idx", optarg);            sprintf(conf,"%s.conf", optarg);            sprintf(it2f,"%s.2it", optarg);            sprintf(hdataf,"%s.data", optarg);            break;         case 'l':            use_diff_f2 = TRUE;            use_diff = TRUE;            break;                          case 'm': //amount of mem available            AVAILMEM = (long) atof(optarg)*MBYTE;            break;	 case 'M':	   memflg = TRUE;	   break; 	 case 'o':            use_output = TRUE;            break;         case 's': //min support            MINSUP_PER = atof(optarg);            break;         case 'z': //sort items in descending order            sort_ascend = FALSE;            break;         case 'Z': //processing order (lex=0, incr=1 (default) ,decr=2)            process_order = atoi(optarg);            break;         case 'x':            sprintf(tempf,"%s",optarg);            break;         }      }   }   if (diff_input) use_diff_f2 = 0;      if (process_order == IN_SUP) sort_ascend = TRUE;   else if (process_order == DE_SUP) sort_ascend = FALSE;      ifstream cfd(conf,ios::in);   if (!cfd){      perror("ERROR: invalid conf file\n");      exit(errno);   }   cfd.read((char *)&DBASE_NUM_TRANS,ITSZ);   if (MINSUPPORT == -1)      MINSUPPORT = (int) (MINSUP_PER*DBASE_NUM_TRANS+0.5);   //ensure that support is at least 2   if (MINSUPPORT < 1) MINSUPPORT = 1;   //cout << "MINSUPPORT " << MINSUPPORT << " " << " " <<      //MINSUP_PER << " " << DBASE_NUM_TRANS << endl;   cfd.read((char *)&DBASE_MAXITEM,ITSZ);   cfd.read((char *)&DBASE_AVG_TRANS_SZ,sizeof(float));   cfd.read((char *)&DBASE_MINTRANS,ITSZ);   cfd.read((char *)&DBASE_MAXTRANS,ITSZ);   //cout << "CONF " << DBASE_NUM_TRANS << " " << DBASE_MAXITEM << " "        //<< DBASE_AVG_TRANS_SZ << endl;   cfd.close();   if (use_horizontal){      diff_input = FALSE; //no diff input      ext_l2_pass = FALSE; //no vertical ext pass   }      if ((use_hash || use_hash_map) && diff_input){      cout << "HASH AND DIFFIN DO NOT WORK!\n";      exit(0);   }   }int main(int argc, char **argv){   TimeTracker tt,tt1;   tt.Start();   int i;   double te,ts;      seconds(ts);   parse_args(argc, argv);   if (use_horizontal) DCB =  new Dbase_Ctrl_Blk(hdataf);   else partition_alloc(dataf, idxf);   maxtidlistsz = make_l1_pass(*DCB);   seconds(te);      iterstat *is = new iterstat(DBASE_MAXITEM, 0, te-ts);   stats.add(is);      Itemset::alloc_tmpiset(maxtidlistsz);      if (use_horizontal) Eqclass::alloc_tmpiset(0);   else Eqclass::alloc_tmpiset(maxtidlistsz);   tt1.Start();   make_l2_pass(ext_l2_pass, it2f, *DCB);   double l2time=tt1.Stop();   is = new iterstat(DBASE_MAXITEM*(DBASE_MAXITEM-1)/2,0,l2time);   stats.add(is);            ECLAT_Find_Freq();      seconds(te);   cout << "TOT TIME " << te-ts << endl;      //cout << "LARGE PATTERNS ";   //for (i=0; i < NumLargeItemset.size(); i++)  //    cout << " " << NumLargeItemset[i];  // cout << endl;   // cout << "Total elapsed time " << te-ts << endl;   /*   fout << "ASSOC ";   if (ECLAT_search == HYBRID) fout << "MAX ";   if (ECLAT_eqc == 1) fout << "ECLAT ";   else fout << "CLIQUE ";   if (ECLAT_bfs == 0) fout << "DFS ";   fout << dataf << " " << MINSUP_PER << " ";   for (i=0; i < NumLargeItemset.size(); i++)      fout << " " << NumLargeItemset[i];   fout << " " << te-ts << endl;*/    double cputime1 = tt.Stop();   //cout << "NumIntersect=" << Itemset::NumIntersect    //   << " DB size:" << DB_size << " total scan:" << total_scan   //   << " scan#:" << (float)total_scan/DB_size << endl;    //cout << "cputime=" << cputime1 << "  L2Time=" << l2time    //   << "  CT-L2T=" << cputime1-l2time << endl;        cout << "CHARM ";   if (diff_input) cout << "DIFFIN ";   else if (use_diff) cout << "DIFF ";   if (process_order == IN_SUP) cout << "IN_SUP ";   else if (process_order == DE_SUP) cout << "DE_SUP ";   else cout << "LEX ";   cout << dataf << " " << MINSUP_PER << " " << DBASE_NUM_TRANS           << " " << MINSUPPORT << endl;   summary << "CHARM ";   if (diff_input) summary << "DIFFIN ";   else if (use_diff_f2) summary << "DIFF2 ";      else if (use_diff) summary << "DIFF ";   if (process_order == IN_SUP) summary << "IN_SUP ";   else if (process_order == DE_SUP) summary << "DE_SUP ";   else summary << "LEX ";   if (use_hash) summary << "USEHASH ";   if (use_hash_map) summary << "USEHASHMAP ";   if (use_horizontal) summary << "HORIZONTAL ";      summary << dataf << " " << MINSUP_PER << " " << DBASE_NUM_TRANS           << " " << MINSUPPORT << " ";      for (i=0; i < CloCount.ctr()->size(); i++){      stats.setlarge(i,(*CloCount.ctr())[i]);         stats[i]->avgtid /= (*FreCount.ctr())[i];      cout << i+1 << " " << (*CloCount.ctr())[i] << " "            << (*FreCount.ctr())[i] << flush << endl;   }      stats.tottime = cputime1;      summary << stats << " " << Itemset::NumIntersect << " "            << DB_size << " " << total_scan << " " << sumtidlist << " "            << (float)total_scan/DB_size << " " << te-ts << " "<< maxiter;      struct rusage ruse;     getrusage(RUSAGE_SELF,&ruse);      summary << " " << getsec(ruse.ru_utime) << " "            << getsec(ruse.ru_stime) << endl;      summary.close();      if (use_horizontal) DCB->delete_tidbuf();   else partition_dealloc();   //cout << sizeof(int) << "  "<< sizeof(long int) << endl;   exit(0);}@1.2log@added use_hash_map@text@d60 1d89 1d103 1a103 1      while ((c=getopt(argc,argv,"a:dDce:hH:i:lm:os:zZ:x:"))!=-1){a106 22            while (*options != '\0') {               switch(getsubopt(&options,alg_name,&value)) {               case EQC:                  if (value) ECLAT_eqc = (atoi(value) == 1) ? TRUE:FALSE;                  break;               case BFS:                  if (value) ECLAT_bfs = (atoi(value) == 1) ? TRUE:FALSE;                  break;               case SEARCH:                  if (value){                     switch (value[0]){                     case 'b':                        ECLAT_search = BOTUP;                        break;                     case 'h':                        ECLAT_search = HYBRID;                        break;                     }                     break;                  }               }            }d146 3@1.1log@Initial revision@text@a19 1#include "gbk.h"d59 1d101 1a101 1      while ((c=getopt(argc,argv,"a:dDce:hHi:lm:os:zZ:x:"))!=-1){d149 2a150 1            use_hash = TRUE;d215 1a215 1   if (use_hash && diff_input){d302 4@

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -