⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 extendf6.c

📁 关联规则中的频繁项集生成算法genmax
💻 C
📖 第 1 页 / 共 2 页
字号:
//This implementation use the idea of diff lists all the way#include <errno.h>#include <iostream.h>#include <fstream.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include <fcntl.h>#include <sys/stat.h>#include <sys/resource.h>#include <unistd.h>#include <sys/time.h>#include <time.h>#include <sys/types.h>#include <string.h>#include <strings.h>#include <sys/mman.h>#ifdef SGIf#endif#include "temlist.h"#include "memman.h"#include "itemset.h"#include "partition.h"#include "extl2.h"#include "eqgrnode.h"#include "assoc.h"#include "Graph.h" #define NONMAXFLG -2 struct timeval tp;   #define min(a, b) ((a) < (b) ? (a) : (b))#define max(a, b) ((a) > (b) ? (a) : (b))//--------------------------------------#include "stats.h" //MJZdouble Stats::tottime = 0;int Stats::totcand = 0;int Stats::totlarge = 0;Stats stats;ofstream summary("summary.out", ios::app);//--------------------------------------char dataf[300];char idxf[300];char conf[300];char tempf[300];char it2f[300];long TOTMEM=0;extern Graph *F2Graph;int data_fd, idx_fd, it2_fd;  int idxflen, it2flen;         //int *it2_cnt; //counts for 2-itemsetsint maxitemsup, min1, max1;//int *offsets;Itemset *item1, *item2; // for use in reading external dbaseList<int> **eqgraph2;double MAXIMAL_THRESHOLD = 1.0;int use_auto_maxthr = 1;boolean sort_ascend = TRUE, check_status;int num_intersect=0;double ts, te;FILE *out;int maxiter = 1;long tmpflen=0;char use_char_extl2=1;char ext_l2_pass=1;  char use_simple_l2=0;char print_output=0; char diff_input=0;char use_diff = 0;char use_diff_f2=0;double extend_time=0, list_intersect_time=0, read_dbase_time=0;double tid_difference_time=0, super_set_time=0, t11=0, t10=0;double t9=0, t8=0, t7=0, t6=0, t5=0, t50=0, t40=0;double L2TIME=0, find_time, copy_time=0, h1, h2, a1, a2;//int  INTERSECT_THRESHOLD;int DBASE_NUM_TRANS;int DBASE_MAXITEM;float DBASE_AVG_TRANS_SZ;int DBASE_MINTRANS; //works only with 1 partitionint DBASE_MAXTRANS;double MINSUP_PER;int MINSUPPORT;List<int> *F1;extern int num_partitions;void parse_args(int argc, char **argv){  extern char * optarg;   int c;      sprintf(tempf,"/tmp/tmp");      if (argc < 2)      cout << "usage: a.out -i<infile>  -s<support>\n";   else{      while ((c=getopt(argc,argv,"abcCdDe:fi:lors:St:w:mx:z"))!=-1){         switch(c){         case 'a':            use_auto_maxthr = 0;            break;         case 'C':            use_char_extl2 = 1;            break;         case 'd':            use_diff=1;            break;                     case 'D':            diff_input = 1;            use_diff = 1;            break;                     case 'e':            num_partitions = atoi(optarg);            ext_l2_pass = 1;            break;                     case 'i':            sprintf(dataf,"%s.tpose", optarg);            sprintf(idxf,"%s.idx", optarg);            sprintf(conf,"%s.conf", optarg);            sprintf(it2f,"%s.2it", optarg);            break;         case 'l':            use_diff_f2 = 1;            use_diff =1;                        break;                     case 'o':            print_output = 1;            break;         case 's':            MINSUP_PER = atof(optarg);            break;         case 'S':            use_simple_l2=1;            break;         case 't':            MAXIMAL_THRESHOLD = atof(optarg);            break;         case 'x':            sprintf(tempf, "%s", optarg);            break;         case 'z':            sort_ascend=FALSE;            break;                     }      }   }   if (diff_input) use_diff_f2 = 0;     c = open(conf, O_RDONLY);  if (c < 0){      perror("ERROR: invalid conf file\n");      exit(errno);  }  read(c,(char *)&DBASE_NUM_TRANS,ITSZ);  MINSUPPORT = (int)(MINSUP_PER*DBASE_NUM_TRANS+0.5);  //ensure that support is at least 2  if (MINSUPPORT < 2) MINSUPPORT = 2;  read(c,(char *)&DBASE_MAXITEM,ITSZ);  read(c,(char *)&DBASE_AVG_TRANS_SZ,sizeof(float));  read(c,(char *)&DBASE_MINTRANS,ITSZ);    read(c,(char *)&DBASE_MAXTRANS,ITSZ);  close(c);  cout << "CONF " << DBASE_NUM_TRANS << " " << DBASE_MAXITEM << " "        << DBASE_AVG_TRANS_SZ << endl;}// It is used for super set test.bool has_sup(Array *L, List<Array *> *U, int s){  Listnode<Array *> *temptr = U -> head();  while (temptr){      if ((temptr->getdata())->Isubset(L, s)) return true;      temptr = temptr -> getnext();  }  return false;}// This function finds all lists in the family M which contain the // element i and add them to the family Z.void find_sets(List<Array *> *Z, List<Array *> *M, int i ){  Listnode<Array *> *temptr = M ->head();  while (temptr){      if (temptr -> getdata() -> Ismember(i)){                          	                        Z -> addatback(temptr->getdata());      }      temptr = temptr -> getnext();  }}void get_intersect(Itemset *join, Itemset *it1, Itemset *it2){   int i,j;   num_intersect++;   if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize());      int dc1 = it1->support()-MINSUPPORT;   int dc2 = it2->support()-MINSUPPORT;   int df1=0;   int df2=0;   int breakflg=0;      for (i=0,j=0; i < it1->support() && j < it2->support();){      if (df1 > dc1 || df2 > dc2){         breakflg=1;                 break;      }            if (it1->tid(i) > it2->tid(j)){         j++;         df2++;      }      else if (it1->tid(i) == it2->tid(j)){         join->add_tid(it1->tid(i));         join->support()++;         j++;         i++;      }      else{         df1++;         i++;      }   }   if (breakflg == 1) join->set_support(0);   }void Diff1(Itemset *join, Itemset *it1, Itemset *it2){    num_intersect++;   if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize());   int i,j, f, k=0, breakflg=0;   int dc1 = it1 -> support()-  MINSUPPORT;      for (i=0,j=0; i < it1->support() && j < it2->support();){      if (k > dc1) {         breakflg = 1;         break;      }      if (it1->tid(i) > it2->tid(j)){         j++;      }      else if (it1->tid(i) == it2->tid(j)){         j++;         i++;      }      else{         join ->add_tid(it1->tid(i));         i++;         k++;      }   }    for (f=i; f < it1->support() && k <= dc1; f++){      join->add_tid(it1->tid(f));      k++;   }   if (k > dc1) join->set_support(0);   else{            join ->set_support(it1->support()- join->tidsize());   }   }void Diff2(Itemset *join, Itemset *it1, Itemset *it2){    num_intersect++;   //cout << "SZZ " << it1->itemsetsize() << " "<<it2->itemsetsize() << endl;      if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize());   int i,j, f, k=0, breakflg=0;   int dc1 = it2 -> support()-  MINSUPPORT;   for (i=0,j=0; i < it1->tidsize() && j < it2->tidsize();){      if (k > dc1) {breakflg = 1; break;}      if (it1->tid(i) > it2->tid(j)){         j++;      }      else if (it1->tid(i) == it2->tid(j)){         j++;         i++;      }      else{         k++;         join ->add_tid(it1->tid(i));         i++;      }   }    //if (breakflg == 1)   join->set_support(0);   //else {   //   for (f=i; f < it1->tidsize(); f++)  join->add_tid(it1->tid(f));   //   join ->set_support(it2->support()- join->tidsize());   // }   for (f=i; f < it1->tidsize() && k <= dc1; f++){      join->add_tid(it1->tid(f));      k++;    }   if (k > dc1) join->set_support(0);    else{            join ->set_support(it2->support()- join->tidsize());   }      }void clear_list_itemset(List<Itemset *> *X){      Listnode <Itemset *> *node = X->head();      while (node){         X->set_head(node -> getnext());          if (node -> getdata()) delete (node -> getdata());         delete node;         node = X->head();      }      delete X;}// Each node in the list it1 has two functions  comblnth() and sup().   //Sorting in increasing order of comblnth() and then in increasing order //of sup().//Radix sort on the key comblnth(i) and while doing this we insert a //new item in ascending order on the key sup(i).List<int> *sorting (List<int> *it1){   int digit;   int Minlevel =  min1;   int Maxlevel =  max1;   //List<int> **level;    //level = new List<int> *[Maxlevel+1];   List <int> *level[Maxlevel+1];   for (int i = Minlevel; i<= Maxlevel; i++)                    level[i]= new List<int>;   Listnode <int> *currentptr = it1 -> head();   Listnode <int> *ptr;   while (currentptr) {     ptr = currentptr -> getnext();     digit = currentptr -> comblnth();     if ( level[digit]->Isempty()){       level[digit]->set_head(currentptr);        level[digit]->set_last(currentptr);      }     else {           Listnode<int> *temptr = level[digit]->head();           if ((currentptr -> sup())<= (temptr -> sup())){             currentptr -> set_next(temptr);             level[digit]->set_head(currentptr);	   }           else {	     while (temptr != level[digit]->last()){                  if ((currentptr -> sup())<= (temptr -> getnext()->sup())){                     currentptr -> set_next (temptr -> getnext());                     temptr -> set_next(currentptr);                     break;		  }                  temptr = temptr -> getnext();	     }             if (temptr == level[digit]->last()){                 level[digit]->last()->set_next(currentptr);                level[digit]->set_last(currentptr);	     }	   }     }     currentptr = ptr;   }        //concatenate the lists level[i] to produce it1.   currentptr = NULL;    for (int i = Maxlevel; i>= Minlevel; i-- )       if (level[i]->head()){               level[i]->last() -> set_next(currentptr);              currentptr = level[i]->head();        }   it1->set_head(currentptr);   return it1;}//Sorting the combine lists in the order of F1// eqgraph2[i] is a pointer to the combine lists c(i).//eqgraph_temp is a pointer to the new sorted combine list eqgraph[i].void procces_comblists(List<int> *F1){   int a,c;   List <int> *eqgraph2_temp;   Listnode <int> *ptr1, *ptr2;   Listnode <int> *currentptr = F1 -> head();   while (currentptr){         ptr1 = F1 -> head();         c = currentptr -> getdata();         while (ptr1 != currentptr){                   a = ptr1->getdata();                   eqgraph2[c] -> deleted(a);                   ptr1 = ptr1 -> getnext();        	 }         ptr2 = currentptr -> getnext();          eqgraph2_temp = new List<int>;         while (ptr2){               a = ptr2 -> getdata();               if (eqgraph2[c]->Ismember(a)) eqgraph2_temp ->addatback(a);               ptr2 = ptr2 -> getnext(); 	 }         delete eqgraph2[c];             eqgraph2[c] = NULL;         eqgraph2[c] = eqgraph2_temp;           currentptr = currentptr -> getnext();   }}//void Extend(Itemset *I,List<Itemset *> *X, List<Array *> *Y, Array *t,  //          int iter)void Extend(Itemset *I, Listnode<Itemset *> *X, List<Array *> *Y, Array *t,            int iter, int l){      //cout << "ITER " << iter << " " << *I << endl;         Array  *G, *tail_list;         Listnode<int> *cptr;         Listnode<Itemset *> *temptr, *ptr, *xxptr;         List<Itemset *> *NewX, *Xtem;         List<Array *> *NewY;         Itemset *it1;         Listnode <Array *> *ttptr;         int m, f, j, size_old, s;          tail_list = new Array(t->size() + I->itemsetsize() + l + 1);         //tail_list = new Array(t->size() + I->itemsetsize() + X->size() + 1);         for (int g = 0; g < t -> size(); g++) tail_list -> add((*t)[g]);          seconds(t7);         Xtem = new List<Itemset *>;         ptr = X;         //ptr = X->head();         for (; ptr; ptr = ptr -> getnext()){                  m = ptr->getdata()->itemset()->head()->getdata();                                                      if (use_diff_f2 || diff_input || (use_diff && iter > 2)){                     it1 = new Itemset(min(I->support()-MINSUPPORT+1,                                           ptr->getdata()->tidsize()));                     Diff2(it1, ptr->getdata(), I);                  }                                    else if (use_diff && iter == 2){                           it1 = new Itemset(I->support()- MINSUPPORT +1);                     //cout << "DIF1 " << *I << endl;                     //cout << "\t " << *(ptr->getdata()) << endl;                                          Diff1(it1,I,ptr->getdata());                                    }                                    else{                     it1 = new Itemset(min(ptr->getdata()->support(),                                           I->support()));                     get_intersect(it1,ptr->getdata(), I);                                     }                                    if ((use_diff_f2 || diff_input ||                        (use_diff && I->itemsetsize() > 2))                        && it1->tidsize()== 0){                     tail_list -> add(m);                     delete it1;		  }                  else if ((!use_diff || (!use_diff_f2 && iter == 2))                           && it1->tidsize() == I->tidsize()){                     tail_list -> add(m);                     delete it1;                  }                                    else if (it1 -> support() >= MINSUPPORT){                                           it1 -> add_item(m);                     if (use_diff)                        Xtem -> sorted_descend(it1, it1->tidsize());                     else                         Xtem -> sorted_ascend(it1, it1->tidsize());                     //Xtem -> sorted_descend(it1, it1->tidsize());                                       }                  else delete it1; 	 }         seconds(t8);         tid_difference_time += t8 - t7;         if (Xtem ->size()< 2){                   if (Xtem ->size()== 1){                             tail_list ->add(Xtem ->head()->getdata()->itemset()->head()->getdata());

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -