⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 treeminer.cpp,v

📁 关联规则中的频繁项集生成算法TreeMiner
💻 CPP,V
📖 第 1 页 / 共 2 页
字号:
head	1.2;access;symbols;locks	zaki:1.2; strict;comment	@// @;1.2date	2001.06.24.19.16.01;	author zaki;	state Exp;branches;next	1.1;1.1date	2001.06.06.00.04.45;	author zaki;	state Exp;branches;next	;desc@@1.2log@added pruning.@text@#include<iostream>#include <unistd.h>#include <algorithm>#include <stdio.h>#include <stl.h>#include <list>using namespace std;//headers#include "treeminer.h"#include "timetrack.h"#include "calcdb.h"#include "eqclass.h"#include "stats.h"#include "hashtable.h"//global varschar infile[300];Dbase_Ctrl_Blk *DCB;Stats stats;double MINSUP_PER;int MINSUPPORT=-1;int DBASE_MAXITEM;int DBASE_NUM_TRANS;//default flagsbool output = false; //don't print freq subtreesbool output_idlist = false; //don't print idlistbool count_unique = true; //count support only once per treebool use_fullpath = false; //use reduced scope to conserve memsort_vals sort_type = nosort; //default is to sort in increasing orderalg_vals alg_type = treeminer; //default is to find all freq patternsprune_vals prune_type = prune; //pruneFreqHT FK; //to store freq subtrees for pruningvector<int> *ITCNT = NULL; //used for sorting F1bool F1cmp(int x, int y){   bool res = false;   if ((*ITCNT)[x] < (*ITCNT)[y]) res = true;   if (sort_type == incr) return res;   else return !res;}   void parse_args(int argc, char **argv){   extern char * optarg;   int c;   if (argc < 2)     cout << "usage: treeminer -i<infile> -s <support>\n";   else{     while ((c=getopt(argc,argv,"a:fi:lop:s:S:uz:"))!=-1){       switch(c){       case 'a':          alg_type = (alg_vals) atoi(optarg);          break;        case 'f':          use_fullpath = true;          break;       case 'i': //input files	 sprintf(infile,"%s",optarg);	 break;       case 'l': //print idlists along with freq subtrees          output=true;          output_idlist = true;          break;       case 'o': //print freq subtrees	 output = true;	 break;       case 'p':          prune_type = (prune_vals) atoi(optarg);          break;        case 's': //support value for L2	 MINSUP_PER = atof(optarg);	 break;       case 'S': //absolute support	 MINSUPPORT = atoi(optarg);	 break;       case 'u': //count support multiple times per tree	 count_unique = false;	 break;       case 'z':          sort_type = (sort_vals) atoi(optarg);          break;       }                    }   }}void get_F1(){  TimeTracker tt;  double te;  int i, j, it;  const int arysz = 10;    vector<int> itcnt(arysz,0); //count item frequency  vector<int> flgs(arysz,-1);  tt.Start();  DBASE_MAXITEM=0;  DBASE_NUM_TRANS = 0;     while(DCB->get_next_trans())   {      //cout << "TRANS " << DCB->Cid << " " << DCB->Tid      //     << " " << DCB->TransSz << " -- ";      //for (i=0; i < DCB->TransSz; i++)      //   cout << " " << DCB->TransAry[i];      //cout << endl;            for (i=0; i < DCB->TransSz; i++){         it = DCB->TransAry[i];         if (it == BranchIt) continue;                  if (it >= DBASE_MAXITEM){            itcnt.resize(it+1,0);            flgs.resize(it+1,-1);            DBASE_MAXITEM = it+1;            //cout << "IT " << DBASE_MAXITEM << endl;         }                  if (count_unique){            if(flgs[it] == DCB->Cid) continue;            else flgs[it] = DCB->Cid;         }         itcnt[it]++;      }            if (DCB->MaxTransSz < DCB->TransSz) DCB->MaxTransSz = DCB->TransSz;           DBASE_NUM_TRANS++;   }      //for (i=0; i < DCB->TransSz; i++){   //   it = DCB->TransAry[i];   //   if (it != BranchIt){   //      cout << it << " " << itcnt[it] << endl;   //   }   //}   //set the value of MINSUPPORT   if (MINSUPPORT == -1)     MINSUPPORT = (int) (MINSUP_PER*DBASE_NUM_TRANS+0.5);      if (MINSUPPORT<1) MINSUPPORT=1;   cout<<"DBASE_NUM_TRANS : "<< DBASE_NUM_TRANS << endl;   cout<<"DBASE_MAXITEM : "<< DBASE_MAXITEM << endl;   cout<<"MINSUPPORT : "<< MINSUPPORT << " (" << MINSUP_PER << ")" << endl;   //count number of frequent items   DCB->NumF1 = 0;   for (i=0; i < DBASE_MAXITEM; i++)     if (itcnt[i] >= MINSUPPORT)       DCB->NumF1++;   int *it_order = new int[DBASE_MAXITEM];   for (i=0; i < DBASE_MAXITEM; i++)      it_order[i] = i;      if (sort_type != nosort){      ITCNT = &itcnt;      sort(&it_order[0], &it_order[DBASE_MAXITEM], F1cmp);   }   //construct forward and reverse mapping from items to freq items   DCB->FreqIdx = new int [DCB->NumF1];   DCB->FreqMap = new int [DBASE_MAXITEM];   for (i=0,j=0; i < DBASE_MAXITEM; i++) {      if (itcnt[it_order[i]] >= MINSUPPORT) {         if (output) cout << i << " " << it_order[i]                           << " - " << itcnt[it_order[i]] << endl;         DCB->FreqIdx[j] = it_order[i];         DCB->FreqMap[it_order[i]] = j;         j++;      }      else DCB->FreqMap[it_order[i]] = -1;   }      cout<< "F1 - " << DCB->NumF1 << " " << DBASE_MAXITEM << endl;        if (sort_type != nosort){      ITCNT = NULL;      delete [] it_order;   }      te = tt.Stop();   stats.add(DBASE_MAXITEM, DCB->NumF1, te);}list<Eqclass *> * get_F2(){  int i,j;  int it1, it2;  int scnt;    TimeTracker tt;  double te;  tt.Start();  list<Eqclass *> *F2list = new list<Eqclass *>;  //itcnt2 is a matrix of pairs p, p.first is count, p.second is flag  int **itcnt2 = new int*[DCB->NumF1];  int **flgs = new int*[DCB->NumF1];  //unsigned int **itcnt2 = new unsigned int *[DCB->NumF1];  for (i=0; i < DCB->NumF1; i++){    itcnt2[i] = new int [DCB->NumF1];    flgs[i] = new int [DCB->NumF1];    //cout << "alloc " << i << " " << itcnt2[i] << endl;    for (j=0; j < DCB->NumF1; j++){      itcnt2[i][j] = 0;      flgs[i][j] = -1;    }  }       DCB->alloc_idlists();      while(DCB->get_next_trans())   {      DCB->get_valid_trans();      DCB->make_vertical();      //DCB->print_trans();      //count a pair only once per cid      for (i=0; i < DCB->TransSz; i++){         it1 = DCB->TransAry[i];         if (it1 != BranchIt){            scnt = 0;            for (j=i+1; scnt >= 0 && j < DCB->TransSz; j++){               it2 = DCB->TransAry[j];               if (it2 != BranchIt){		 scnt++;		 if (count_unique){		   if (flgs[it1][it2] == DCB->Cid) continue;		   else flgs[it1][it2] = DCB->Cid;		 }		 //cout << "cnt " << it1 << " " << it2 << endl;		 itcnt2[it1][it2]++;               }               else scnt--;            }         }      }   }                                 int F2cnt=0;   // count frequent patterns and generate eqclass   Eqclass *eq;   for (i=0; i < DCB->NumF1; i++) {      eq = NULL;      for (j=0; j < DCB->NumF1; j++) {	//cout << "access " << i << " " << j << endl;         if (itcnt2[i][j] >= MINSUPPORT){            F2cnt++;            if (eq == NULL){               eq = new Eqclass;               eq->prefix().push_back(i);            }	    eq->add_node(j,0);            	    if (output) 	      cout << DCB->FreqIdx[i] << " " << DCB->FreqIdx[j] 		   << " - " << itcnt2[i][j] << endl;         }      }         if (eq != NULL) F2list->push_back(eq);   }      for (i=0; i < DCB->NumF1; i++) {     //cout << "dealloc " << i << " " << itcnt2[i] << endl;     delete [] itcnt2[i];     //cout << "dealloc " << i << " " << flgs[i] << endl;     delete [] flgs[i];   }   delete [] itcnt2;   delete [] flgs;         cout << "F2 - " << F2cnt << " " << DCB->NumF1 * DCB->NumF1 << endl;   te = tt.Stop();   stats.add(DCB->NumF1 * DCB->NumF1, F2cnt, te);   return F2list;}static bool notfrequent (Eqnode &n){  //cout << "IN FREQ " << n.sup << endl;  if (n.sup >= MINSUPPORT) return false;  else return true;}void check_ins(idlist *l1, idlist *l2, Eqnode *ins,                 int st1, int st2, int en1, int en2){   static idnode *n1, *n2;   static ival_type cmpval;   bool found_flg = false;   int pos1 = st1; //temporary position holder for n1 ival   bool next2; //should we go to next n2 ival?   //for each ival in n2, find the closest parent in n1   int tpos = ins->tlist.size();   while(st2 < en2){      n1 = &(*l1)[pos1];      n2 = &(*l2)[st2];      next2 = true; //by default we go to next n2 ival      cmpval = ival::compare(n1->itscope, n2->itscope);      switch (cmpval){      case sup:          //n2 was under some n1         if (n1->path_equal(*n2)){            if (en1-st1 > 1 || use_fullpath){               ins->tlist.push_back(idnode(n2->cid, n1->parscope,                                            n1->itscope.lb, n2->itscope));            }            else{               ins->tlist.push_back(idnode(n2->cid,n2->itscope));            }            if (!count_unique) ins->sup++;            found_flg = true;         }                  next2 = false;         break;      case before:          //check next n1 ival for same n2 ival         next2 = false;          break;      }      if (next2 || pos1+1 == en1){ //go to next n2 ival         pos1 = st1;         st2++;      }      else pos1++; //go to next n1 ival   }      if (found_flg && count_unique) ins->sup++;}   void check_outs(idlist *l1, idlist *l2, Eqnode *outs,                int st1, int st2, int en1, int en2){   static idnode *n1, *n2;   static ival_type cmpval;   bool found_flg = false;   bool next2;   int pos1 = st1;      //for each n2 ival find if there is a sibling to the left   int tpos = outs->tlist.size();   while(st2 < en2){      n1 = &(*l1)[pos1];      n2 = &(*l2)[st2];      next2 = true;      cmpval = ival::compare(n1->itscope, n2->itscope);      switch (cmpval){      case sup:         next2 = false;         break;      case before:          //n1 is before n2. Check if n1.par is subset of or equal to n2.par         if (n1->path_equal(*n2)){            if (en1 - st1 > 1 || use_fullpath){               outs->tlist.push_back(idnode(n2->cid, n1->parscope,                                             n1->itscope.lb, n2->itscope));            }            else{               outs->tlist.push_back(idnode(n2->cid,n2->itscope));            }            if (!count_unique) outs->sup++;            found_flg = true;         }         next2 = false;         break;      }      if (next2 || pos1+1 == en1){ //go to next n2 ival         pos1 = st1;         st2++;      }      else pos1++; //go to next n1 ival   }   if (found_flg && count_unique) outs->sup++;}void get_intersect(idlist *l1, idlist *l2, Eqnode *ins, Eqnode *outs){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -