📄 extendf6.c
字号:
//This implementation use the idea of diff lists all the way#include <errno.h>#include <iostream.h>#include <fstream.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include <fcntl.h>#include <sys/stat.h>#include <sys/resource.h>#include <unistd.h>#include <sys/time.h>#include <time.h>#include <sys/types.h>#include <string.h>#include <strings.h>#include <sys/mman.h>#ifdef SGIf#endif#include "temlist.h"#include "memman.h"#include "itemset.h"#include "partition.h"#include "extl2.h"#include "eqgrnode.h"#include "assoc.h"#include "Graph.h" #define NONMAXFLG -2 struct timeval tp; #define min(a, b) ((a) < (b) ? (a) : (b))#define max(a, b) ((a) > (b) ? (a) : (b))//--------------------------------------#include "stats.h" //MJZdouble Stats::tottime = 0;int Stats::totcand = 0;int Stats::totlarge = 0;Stats stats;ofstream summary("summary.out", ios::app);//--------------------------------------char dataf[300];char idxf[300];char conf[300];char tempf[300];char it2f[300];long TOTMEM=0;extern Graph *F2Graph;int data_fd, idx_fd, it2_fd; int idxflen, it2flen; //int *it2_cnt; //counts for 2-itemsetsint maxitemsup, min1, max1;//int *offsets;Itemset *item1, *item2; // for use in reading external dbaseList<int> **eqgraph2;double MAXIMAL_THRESHOLD = 1.0;int use_auto_maxthr = 1;boolean sort_ascend = TRUE, check_status;int num_intersect=0;double ts, te;FILE *out;int maxiter = 1;long tmpflen=0;char use_char_extl2=1;char ext_l2_pass=1; char use_simple_l2=0;char print_output=0; char diff_input=0;char use_diff = 0;char use_diff_f2=0;double extend_time=0, list_intersect_time=0, read_dbase_time=0;double tid_difference_time=0, super_set_time=0, t11=0, t10=0;double t9=0, t8=0, t7=0, t6=0, t5=0, t50=0, t40=0;double L2TIME=0, find_time, copy_time=0, h1, h2, a1, a2;//int INTERSECT_THRESHOLD;int DBASE_NUM_TRANS;int DBASE_MAXITEM;float DBASE_AVG_TRANS_SZ;int DBASE_MINTRANS; //works only with 1 partitionint DBASE_MAXTRANS;double MINSUP_PER;int MINSUPPORT;List<int> *F1;extern int num_partitions;void parse_args(int argc, char **argv){ extern char * optarg; int c; sprintf(tempf,"/tmp/tmp"); if (argc < 2) cout << "usage: a.out -i<infile> -s<support>\n"; else{ while ((c=getopt(argc,argv,"abcCdDe:fi:lors:St:w:mx:z"))!=-1){ switch(c){ case 'a': use_auto_maxthr = 0; break; case 'C': use_char_extl2 = 1; break; case 'd': use_diff=1; break; case 'D': diff_input = 1; use_diff = 1; break; case 'e': num_partitions = atoi(optarg); ext_l2_pass = 1; break; case 'i': sprintf(dataf,"%s.tpose", optarg); sprintf(idxf,"%s.idx", optarg); sprintf(conf,"%s.conf", optarg); sprintf(it2f,"%s.2it", optarg); break; case 'l': use_diff_f2 = 1; use_diff =1; break; case 'o': print_output = 1; break; case 's': MINSUP_PER = atof(optarg); break; case 'S': use_simple_l2=1; break; case 't': MAXIMAL_THRESHOLD = atof(optarg); break; case 'x': sprintf(tempf, "%s", optarg); break; case 'z': sort_ascend=FALSE; break; } } } if (diff_input) use_diff_f2 = 0; c = open(conf, O_RDONLY); if (c < 0){ perror("ERROR: invalid conf file\n"); exit(errno); } read(c,(char *)&DBASE_NUM_TRANS,ITSZ); MINSUPPORT = (int)(MINSUP_PER*DBASE_NUM_TRANS+0.5); //ensure that support is at least 2 if (MINSUPPORT < 2) MINSUPPORT = 2; read(c,(char *)&DBASE_MAXITEM,ITSZ); read(c,(char *)&DBASE_AVG_TRANS_SZ,sizeof(float)); read(c,(char *)&DBASE_MINTRANS,ITSZ); read(c,(char *)&DBASE_MAXTRANS,ITSZ); close(c); cout << "CONF " << DBASE_NUM_TRANS << " " << DBASE_MAXITEM << " " << DBASE_AVG_TRANS_SZ << endl;}// It is used for super set test.bool has_sup(Array *L, List<Array *> *U, int s){ Listnode<Array *> *temptr = U -> head(); while (temptr){ if ((temptr->getdata())->Isubset(L, s)) return true; temptr = temptr -> getnext(); } return false;}// This function finds all lists in the family M which contain the // element i and add them to the family Z.void find_sets(List<Array *> *Z, List<Array *> *M, int i ){ Listnode<Array *> *temptr = M ->head(); while (temptr){ if (temptr -> getdata() -> Ismember(i)){ Z -> addatback(temptr->getdata()); } temptr = temptr -> getnext(); }}void get_intersect(Itemset *join, Itemset *it1, Itemset *it2){ int i,j; num_intersect++; if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize()); int dc1 = it1->support()-MINSUPPORT; int dc2 = it2->support()-MINSUPPORT; int df1=0; int df2=0; int breakflg=0; for (i=0,j=0; i < it1->support() && j < it2->support();){ if (df1 > dc1 || df2 > dc2){ breakflg=1; break; } if (it1->tid(i) > it2->tid(j)){ j++; df2++; } else if (it1->tid(i) == it2->tid(j)){ join->add_tid(it1->tid(i)); join->support()++; j++; i++; } else{ df1++; i++; } } if (breakflg == 1) join->set_support(0); }void Diff1(Itemset *join, Itemset *it1, Itemset *it2){ num_intersect++; if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize()); int i,j, f, k=0, breakflg=0; int dc1 = it1 -> support()- MINSUPPORT; for (i=0,j=0; i < it1->support() && j < it2->support();){ if (k > dc1) { breakflg = 1; break; } if (it1->tid(i) > it2->tid(j)){ j++; } else if (it1->tid(i) == it2->tid(j)){ j++; i++; } else{ join ->add_tid(it1->tid(i)); i++; k++; } } for (f=i; f < it1->support() && k <= dc1; f++){ join->add_tid(it1->tid(f)); k++; } if (k > dc1) join->set_support(0); else{ join ->set_support(it1->support()- join->tidsize()); } }void Diff2(Itemset *join, Itemset *it1, Itemset *it2){ num_intersect++; //cout << "SZZ " << it1->itemsetsize() << " "<<it2->itemsetsize() << endl; if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize()); int i,j, f, k=0, breakflg=0; int dc1 = it2 -> support()- MINSUPPORT; for (i=0,j=0; i < it1->tidsize() && j < it2->tidsize();){ if (k > dc1) {breakflg = 1; break;} if (it1->tid(i) > it2->tid(j)){ j++; } else if (it1->tid(i) == it2->tid(j)){ j++; i++; } else{ k++; join ->add_tid(it1->tid(i)); i++; } } //if (breakflg == 1) join->set_support(0); //else { // for (f=i; f < it1->tidsize(); f++) join->add_tid(it1->tid(f)); // join ->set_support(it2->support()- join->tidsize()); // } for (f=i; f < it1->tidsize() && k <= dc1; f++){ join->add_tid(it1->tid(f)); k++; } if (k > dc1) join->set_support(0); else{ join ->set_support(it2->support()- join->tidsize()); } }void clear_list_itemset(List<Itemset *> *X){ Listnode <Itemset *> *node = X->head(); while (node){ X->set_head(node -> getnext()); if (node -> getdata()) delete (node -> getdata()); delete node; node = X->head(); } delete X;}// Each node in the list it1 has two functions comblnth() and sup(). //Sorting in increasing order of comblnth() and then in increasing order //of sup().//Radix sort on the key comblnth(i) and while doing this we insert a //new item in ascending order on the key sup(i).List<int> *sorting (List<int> *it1){ int digit; int Minlevel = min1; int Maxlevel = max1; //List<int> **level; //level = new List<int> *[Maxlevel+1]; List <int> *level[Maxlevel+1]; for (int i = Minlevel; i<= Maxlevel; i++) level[i]= new List<int>; Listnode <int> *currentptr = it1 -> head(); Listnode <int> *ptr; while (currentptr) { ptr = currentptr -> getnext(); digit = currentptr -> comblnth(); if ( level[digit]->Isempty()){ level[digit]->set_head(currentptr); level[digit]->set_last(currentptr); } else { Listnode<int> *temptr = level[digit]->head(); if ((currentptr -> sup())<= (temptr -> sup())){ currentptr -> set_next(temptr); level[digit]->set_head(currentptr); } else { while (temptr != level[digit]->last()){ if ((currentptr -> sup())<= (temptr -> getnext()->sup())){ currentptr -> set_next (temptr -> getnext()); temptr -> set_next(currentptr); break; } temptr = temptr -> getnext(); } if (temptr == level[digit]->last()){ level[digit]->last()->set_next(currentptr); level[digit]->set_last(currentptr); } } } currentptr = ptr; } //concatenate the lists level[i] to produce it1. currentptr = NULL; for (int i = Maxlevel; i>= Minlevel; i-- ) if (level[i]->head()){ level[i]->last() -> set_next(currentptr); currentptr = level[i]->head(); } it1->set_head(currentptr); return it1;}//Sorting the combine lists in the order of F1// eqgraph2[i] is a pointer to the combine lists c(i).//eqgraph_temp is a pointer to the new sorted combine list eqgraph[i].void procces_comblists(List<int> *F1){ int a,c; List <int> *eqgraph2_temp; Listnode <int> *ptr1, *ptr2; Listnode <int> *currentptr = F1 -> head(); while (currentptr){ ptr1 = F1 -> head(); c = currentptr -> getdata(); while (ptr1 != currentptr){ a = ptr1->getdata(); eqgraph2[c] -> deleted(a); ptr1 = ptr1 -> getnext(); } ptr2 = currentptr -> getnext(); eqgraph2_temp = new List<int>; while (ptr2){ a = ptr2 -> getdata(); if (eqgraph2[c]->Ismember(a)) eqgraph2_temp ->addatback(a); ptr2 = ptr2 -> getnext(); } delete eqgraph2[c]; eqgraph2[c] = NULL; eqgraph2[c] = eqgraph2_temp; currentptr = currentptr -> getnext(); }}//void Extend(Itemset *I,List<Itemset *> *X, List<Array *> *Y, Array *t, // int iter)void Extend(Itemset *I, Listnode<Itemset *> *X, List<Array *> *Y, Array *t, int iter, int l){ //cout << "ITER " << iter << " " << *I << endl; Array *G, *tail_list; Listnode<int> *cptr; Listnode<Itemset *> *temptr, *ptr, *xxptr; List<Itemset *> *NewX, *Xtem; List<Array *> *NewY; Itemset *it1; Listnode <Array *> *ttptr; int m, f, j, size_old, s; tail_list = new Array(t->size() + I->itemsetsize() + l + 1); //tail_list = new Array(t->size() + I->itemsetsize() + X->size() + 1); for (int g = 0; g < t -> size(); g++) tail_list -> add((*t)[g]); seconds(t7); Xtem = new List<Itemset *>; ptr = X; //ptr = X->head(); for (; ptr; ptr = ptr -> getnext()){ m = ptr->getdata()->itemset()->head()->getdata(); if (use_diff_f2 || diff_input || (use_diff && iter > 2)){ it1 = new Itemset(min(I->support()-MINSUPPORT+1, ptr->getdata()->tidsize())); Diff2(it1, ptr->getdata(), I); } else if (use_diff && iter == 2){ it1 = new Itemset(I->support()- MINSUPPORT +1); //cout << "DIF1 " << *I << endl; //cout << "\t " << *(ptr->getdata()) << endl; Diff1(it1,I,ptr->getdata()); } else{ it1 = new Itemset(min(ptr->getdata()->support(), I->support())); get_intersect(it1,ptr->getdata(), I); } if ((use_diff_f2 || diff_input || (use_diff && I->itemsetsize() > 2)) && it1->tidsize()== 0){ tail_list -> add(m); delete it1; } else if ((!use_diff || (!use_diff_f2 && iter == 2)) && it1->tidsize() == I->tidsize()){ tail_list -> add(m); delete it1; } else if (it1 -> support() >= MINSUPPORT){ it1 -> add_item(m); if (use_diff) Xtem -> sorted_descend(it1, it1->tidsize()); else Xtem -> sorted_ascend(it1, it1->tidsize()); //Xtem -> sorted_descend(it1, it1->tidsize()); } else delete it1; } seconds(t8); tid_difference_time += t8 - t7; if (Xtem ->size()< 2){ if (Xtem ->size()== 1){ tail_list ->add(Xtem ->head()->getdata()->itemset()->head()->getdata());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -