📄 nextendf5.c
字号:
//This implementation use the idea of diff lists all the way#include <errno.h>#include <iostream.h>#include <fstream.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include <fcntl.h>#include <sys/stat.h>#include <unistd.h>#include <sys/time.h>#include <time.h>#include <sys/types.h>#include <string.h>#include <strings.h>#include <sys/mman.h>#ifdef SGIf#endif#include "temlist.h"#include "memman.h"#include "itemset.h"#include "partition.h"#include "extl2.h"#include "eqgrnode.h"#include "assoc.h"#include "Graph.h" #define NONMAXFLG -2 struct timeval tp; #define min(a, b) ((a) < (b) ? (a) : (b))#define max(a, b) ((a) > (b) ? (a) : (b))//--------------------------------------#include "stats.h" //MJZdouble Stats::tottime = 0;int Stats::totcand = 0;int Stats::totlarge = 0;Stats stats;ofstream summary("summary.out", ios::app);//--------------------------------------char dataf[300];char idxf[300];char conf[300];char it2f[300];long TOTMEM=0;extern Graph *F2Graph;int data_fd, idx_fd, it2_fd; int idxflen, it2flen; //int *it2_cnt; //counts for 2-itemsetsint maxitemsup, min1, max1;//int *offsets;Itemset *item1, *item2; // for use in reading external dbaseList<int> **eqgraph2;double MAXIMAL_THRESHOLD = 1.0;int use_auto_maxthr = 1;boolean sort_ascend = TRUE;int num_intersect=0;double ts, te;FILE *out;int maxiter = 1;long tmpflen=0;char use_char_extl2=1;char ext_l2_pass=1; char use_simple_l2=0;char print_output=0; char diff_input=0;double extend_time=0, list_intersect_time=0, read_dbase_time=0;double tid_difference_time=0, super_set_time=0, t11=0, t10=0;double t9=0, t8=0, t7=0, t6=0, t5=0, t50=0, t40=0;double L2TIME=0, find_time;int INTERSECT_THRESHOLD;int DBASE_NUM_TRANS;int DBASE_MAXITEM, yesize_old;float DBASE_AVG_TRANS_SZ;int DBASE_MINTRANS; //works only with 1 partitionint DBASE_MAXTRANS;double MINSUP_PER;int MINSUPPORT;List<int> *F1;extern int num_partitions;void parse_args(int argc, char **argv){ extern char * optarg; int c; if (argc < 2) cout << "usage: a.out -i<infile> -s<support>\n"; else{ while ((c=getopt(argc,argv,"abcCe:fi:ors:St:w:m"))!=-1){ switch(c){ case 'a': use_auto_maxthr = 0; break; case 'C': use_char_extl2 = 1; break; case 'e': num_partitions = atoi(optarg); ext_l2_pass = 1; break; case 'i': sprintf(dataf,"%s.tpose", optarg); sprintf(idxf,"%s.idx", optarg); sprintf(conf,"%s.conf", optarg); sprintf(it2f,"%s.2it", optarg); break; case 'o': print_output = 1; break; case 's': MINSUP_PER = atof(optarg); break; case 'S': use_simple_l2=1; break; case 't': MAXIMAL_THRESHOLD = atof(optarg); break; } } } c = open(conf, O_RDONLY); if (c < 0){ perror("ERROR: invalid conf file\n"); exit(errno); } read(c,(char *)&DBASE_NUM_TRANS,ITSZ); MINSUPPORT = (int)(MINSUP_PER*DBASE_NUM_TRANS+0.5); //ensure that support is at least 2 if (MINSUPPORT < 2) MINSUPPORT = 2; read(c,(char *)&DBASE_MAXITEM,ITSZ); read(c,(char *)&DBASE_AVG_TRANS_SZ,sizeof(float)); read(c,(char *)&DBASE_MINTRANS,ITSZ); read(c,(char *)&DBASE_MAXTRANS,ITSZ); close(c); cout << "CONF " << DBASE_NUM_TRANS << " " << DBASE_MAXITEM << " " << DBASE_AVG_TRANS_SZ << endl;}// It is used for super set test.bool has_sup(List<int> *L, List<List<int> *> *U){ Listnode<List<int> *> *temptr = U -> head(); while (temptr){ if ((temptr->getdata())->Isubset(L)) return true; temptr = temptr -> getnext(); } return false;}// This function finds all lists in the family M which contain the // element i and add them to the family Z.void find_sets(List<List<int> *> *Z, List<List<int> *> *M, int i ){ Listnode<List<int> *> *temptr = M ->head(); while (temptr){ if ((temptr->getdata())-> Ismember(i)){ Z -> addatback(temptr->getdata()); } temptr = temptr -> getnext(); }}void Diff1(Itemset *join, Itemset *it1, Itemset *it2){ num_intersect++; int i,j, f; for (i=0,j=0; i < it1->support() && j < it2->support();){ if (it1->tid(i) > it2->tid(j)){ j++; } else if (it1->tid(i) == it2->tid(j)){ j++; i++; } else{ join ->add_tid(it1->tid(i)); i++; } } for (f=i; f < it1->support(); f++) join->add_tid(it1->tid(f)); join ->set_support(it1->support()- join->tidsize());}void Diff2(Itemset *join, Itemset *it1, Itemset *it2){ num_intersect++; //cout << "SZZ " << it1->itemsetsize() << " "<<it2->itemsetsize() << endl; if (it2->itemsetsize() > 1) stats.incrcand(it2->itemsetsize()); int i,j, f, k=0, breakflg=0; int dc1 = it2 -> support()- MINSUPPORT; for (i=0,j=0; i < it1->tidsize() && j < it2->tidsize();){ if (k > dc1) {breakflg = 1; break;} if (it1->tid(i) > it2->tid(j)){ j++; } else if (it1->tid(i) == it2->tid(j)){ j++; i++; } else{ k++; join ->add_tid(it1->tid(i)); i++; } } if (breakflg == 1) join->set_support(0); else { for (f=i; f < it1->tidsize(); f++) join->add_tid(it1->tid(f)); join ->set_support(it2->support()- join->tidsize()); }}void clear_list_itemset(List<Itemset *> *X){ Listnode <Itemset *> *node = X->head(); while (node){ X->set_head(node -> getnext()); if (node -> getdata()) delete (node -> getdata()); delete node; node = X->head(); } delete X;}// Each node in the list it1 has two functions comblnth() and sup(). //Sorting in increasing order of comblnth() and then in increasing order //of sup().//Radix sort on the key comblnth(i) and while doing this we insert a //new item in ascending order on the key sup(i).List<int> *sorting (List<int> *it1){ int digit; int Minlevel = min1; int Maxlevel = max1; //List<int> **level; //level = new List<int> *[Maxlevel+1]; List <int> *level[Maxlevel+1]; for (int i = Minlevel; i<= Maxlevel; i++) level[i]= new List<int>; Listnode <int> *currentptr = it1 -> head(); Listnode <int> *ptr; while (currentptr) { ptr = currentptr -> getnext(); digit = currentptr -> comblnth(); if ( level[digit]->Isempty()){ level[digit]->set_head(currentptr); level[digit]->set_last(currentptr); } else { Listnode<int> *temptr = level[digit]->head(); if ((currentptr -> sup())<= (temptr -> sup())){ currentptr -> set_next(temptr); level[digit]->set_head(currentptr); } else { while (temptr != level[digit]->last()){ if ((currentptr -> sup())<= (temptr -> getnext()->sup())){ currentptr -> set_next (temptr -> getnext()); temptr -> set_next(currentptr); break; } temptr = temptr -> getnext(); } if (temptr == level[digit]->last()){ level[digit]->last()->set_next(currentptr); level[digit]->set_last(currentptr); } } } currentptr = ptr; } //concatenate the lists level[i] to produce it1. currentptr = NULL; for (int i = Maxlevel; i>= Minlevel; i-- ) if (level[i]->head()){ level[i]->last() -> set_next(currentptr); currentptr = level[i]->head(); } it1->set_head(currentptr); return it1;}//Sorting the combine lists in the order of F1// eqgraph2[i] is a pointer to the combine lists c(i).//eqgraph_temp is a pointer to the new sorted combine list eqgraph[i].void procces_comblists(List<int> *F1){ int a,c; List <int> *eqgraph2_temp; Listnode <int> *ptr1, *ptr2; Listnode <int> *currentptr = F1 -> head(); while (currentptr){ ptr1 = F1 -> head(); c = currentptr -> getdata(); while (ptr1 != currentptr){ a = ptr1->getdata(); eqgraph2[c] -> deleted(a); ptr1 = ptr1 -> getnext(); } ptr2 = currentptr -> getnext(); eqgraph2_temp = new List<int>; while (ptr2){ a = ptr2 -> getdata(); if (eqgraph2[c]->Ismember(a)) eqgraph2_temp ->addatback(a); ptr2 = ptr2 -> getnext(); } delete eqgraph2[c]; eqgraph2[c] = NULL; eqgraph2[c] = eqgraph2_temp; currentptr = currentptr -> getnext(); }}void Extend(Itemset *I,List<Itemset *> *X, List<List<int> *> *Y, List<int> *t){ List<int> *G, *tail_list; Listnode<int> *cptr; Listnode<Itemset *> *temptr, *ptr, *xxptr; List<Itemset *> *NewX, *Xtem; List<List<int> *> *NewY; Itemset *NewI, *it1; Listnode <List<int> *> *ttptr; int m, f, j, size_old; tail_list = new List<int>; cptr = t -> head(); while (cptr){ tail_list -> addatback(cptr ->getdata()); cptr = cptr -> getnext(); } //seconds(t7); Xtem = new List<Itemset *>; ptr = X->head(); for (; ptr; ptr = ptr -> getnext()){ m= ptr->getdata()->itemset()->head()->getdata(); it1 = new Itemset(ptr->getdata()->tidtotsize()); Diff2(it1, ptr->getdata(), I); if (it1->tidsize()== 0){ tail_list -> addatfront(m); delete it1; } else if (it1 -> support() >= MINSUPPORT){ it1 -> add_item(m); Xtem -> sorted_descend(it1, it1->tidsize()); } else delete it1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -