📄 extl2.cc,v
字号:
head 1.2;access;symbols;locks zaki:1.2; strict;comment @// @;1.2date 2001.06.24.21.17.20; author zaki; state Exp;branches;next 1.1;1.1date 2001.06.12.16.41.58; author zaki; state Exp;branches;next ;desc@Charm with Hashing.@1.2log@added use_hash_map@text@#include <errno.h>#include <iostream.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <unistd.h>#include <sys/stat.h>#include <fcntl.h>#include <sys/mman.h>#include <strings.h>#include <limits.h>#include "extl2.h"#include "assoc.h"#include "partition.h"#include "Graph.h"#include "Util.h"#define isetbufsz 1024extern unsigned long int DB_size;int isetbuf[isetbufsz];int isetpos=0;int isetfd;unsigned char **cset_sup;unsigned int **set_sup;invdb *invDB;int EXTBLKSZ;int Graph::numF1=0;extern char tempf[];invdb::invdb(int sz): Array<Array<int> *>(sz){ for (int i=0; i < totsize(); i++){ (*this)[i] = NULL; } }invdb::~invdb(){ for (int i=0; i < totsize(); i++){ if ((*this)[i]) delete (*this)[i]; }}void invdb::add_db(int tid, int it){ if ((*this)[tid] == NULL){ (*this)[tid] = new Array<int>((int)DBASE_AVG_TRANS_SZ); } (*this)[tid]->add(it);}int cmp2it(const void *a, const void *b){ int *ary = (int *)a; int *bry = (int *)b; if (ary[0] < bry[0]) return -1; else if (ary[0] > bry[0]) return 1; else{ if (ary[1] < bry[1]) return -1; else if (ary[1] > bry[1]) return 1; else return 0; }}int make_horizontal_l1_pass(Dbase_Ctrl_Blk &DCB){ int i,j; int supsz; int maxsup = 0; int *itcnt = new int [DBASE_MAXITEM]; bzero((char *)itcnt, DBASE_MAXITEM*sizeof(int)); F2Graph = new Graph(100); Graph::numF1 = 0; int *buf, custid, tid, numitem; DCB.get_first_blk(); DCB.get_next_trans(buf, numitem, tid, custid); while (!DCB.eof()){ for (j=0; j < numitem; j++){ itcnt[buf[j]]++; } DCB.get_next_trans(buf, numitem, tid, custid); } DCB.freqidx = new int[DBASE_MAXITEM]; DCB.tidbuflen=0; for (i=0; i < DBASE_MAXITEM; i++){ supsz = itcnt[i]; DB_size+=supsz; if (diff_input) supsz = DBASE_NUM_TRANS - supsz; if (supsz >= MINSUPPORT){ //if (diff_input) stats[0]->avgtid += DBASE_NUM_TRANS - supsz; //else stats[0]->avgtid += supsz; F2Graph->add_node(i,supsz); DCB.freqidx[i] = Graph::numF1; //cout << "LARGE " << i << " " << Graph::numF1 << " " << supsz << endl; Graph::numF1++; DCB.tidbuflen += supsz; } else DCB.freqidx[i] = -1; if (supsz > maxsup) maxsup = supsz; } cout << "Graph::numF1 " << Graph::numF1 << endl; F2Graph->compact(); delete [] itcnt; int fd=-1; //if ((fd = open ("/tmp/tidbuf", (O_WRONLY|O_CREAT|O_TRUNC), 0666)) < 0){ // perror("Can't open tidbuf file"); // exit (errno); //} DCB.tidbuf = (int *) mmap ((char *)NULL, DCB.tidbuflen*ITSZ, (PROT_READ|PROT_WRITE), MAP_ANON|MAP_PRIVATE, fd, 0); if (DCB.tidbuf == (int *)-1){ perror("tidbuf MMAP ERROR"); exit(errno); } DCB.tidlists = new Array<int> *[Graph::numF1]; int offt = 0; for (i=0; i < Graph::numF1; i++){ DCB.tidlists[i] = new Array<int> ((*F2Graph)[i]->sup(), &DCB.tidbuf[offt]); offt += (*F2Graph)[i]->sup(); } return maxsup;}int make_vertical_l1_pass(){ int i; int supsz; int idxval = -1; int maxsup = 0; F2Graph = new Graph(100); Graph::numF1 = 0; for (i=0; i < DBASE_MAXITEM; i++){ idxval = partition_idxval(i); if (idxval == -1) continue; //item has no tidlist supsz = partition_get_idxsup(i); DB_size+=supsz; if (diff_input) supsz = DBASE_NUM_TRANS - supsz; if (supsz >= MINSUPPORT){ F2Graph->add_node(i,supsz); Graph::numF1++; } if (supsz > maxsup) maxsup = supsz; } cout << "Graph::numF1 " << Graph::numF1 << endl; F2Graph->compact(); return maxsup;}int make_l1_pass(Dbase_Ctrl_Blk &DCB){ if (use_horizontal) return make_horizontal_l1_pass(DCB); else return make_vertical_l1_pass();}void process_cust_invert(int curcnt, int *curit){ int i,j; int it1, it2; //for (i=0; i < curcnt; i++) // cout << " " << (*F2Graph)[curit[i]]->item(); //cout << endl; if (use_char_extl2){ for (i=0; i < curcnt; i++){ it1 = curit[i]; if (cset_sup[it1]){ for (j=i+1; j < curcnt; j++){ it2 = curit[j]; if ((++cset_sup[it1][it2-it1-1]) == 0){ if (isetpos+2 > isetbufsz){ write(isetfd, (char *)isetbuf, isetpos*ITSZ); isetpos = 0; } isetbuf[isetpos++] = it1; isetbuf[isetpos++] = it2; } } } } } else{ for (i=0; i < curcnt; i++){ it1 = curit[i]; if (set_sup[it1]){ for (j=i+1; j < curcnt; j++){ it2 = curit[j]; set_sup[it1][it2-it1-1]++; } } } }}void process_diff_cust(int curcnt, int *curit){ int i,j; static Array<int> trans(100); trans.reset(); //for (i=0; i < curcnt; i++) // cout << " " << curit[i]; //cout << endl; i = 0; //iterate over numitems j = 0; //idx into curit for (j=0; j < curcnt; j++){ while (i < curit[j]){ if (partition_idxval((*F2Graph)[i]->item()) != -1) trans.add(i); i++; } i++; //skip over curit[j] } for (; i < Graph::numF1; i++){ if (partition_idxval((*F2Graph)[i]->item()) != -1) trans.add(i); } process_cust_invert(trans.size(), trans.array()); }void process_invert(int pnum){ int i,k; int minv, maxv; if (diff_input){ minv = DBASE_MINTRANS; maxv = DBASE_MAXTRANS; } else{ minv = INT_MAX; maxv = 0; for (i=0; i < Graph::numF1; i++){ partition_get_minmaxtid(pnum, (*F2Graph)[i]->item(), minv, maxv); } } //cout << "MINMAX " << minv << " " << maxv << endl; if (invDB->totsize() < maxv-minv+1) invDB->Realloc(maxv-minv+1); int supsz; int ivalsz=0; int *ival = NULL; for (i=0; i < Graph::numF1; i++){ supsz = partition_get_lidxsup(pnum, (*F2Graph)[i]->item()); if (ivalsz < supsz){ ivalsz = Util<int>::Realloc(supsz, ITSZ, ival); } partition_lclread_item(ival, pnum, (*F2Graph)[i]->item()); int cid; int midx; for (int pos=0; pos < supsz; pos++) { cid = ival[pos]; midx = cid - minv; invDB->add_db(midx,i); } } for (k=0; k < maxv-minv+1; k++){ if (diff_input){ if ((*invDB)[k]){ if (Graph::numF1-(*invDB)[k]->size() > 0) process_diff_cust((*invDB)[k]->size(), (*invDB)[k]->array()); (*invDB)[k]->reset(); } else process_diff_cust(0, NULL); } else{ if ((*invDB)[k] && (*invDB)[k]->size() > 0){ //cout << "MVAL " << minv+k << " " << (*invDB)[k]->size() << endl; //for (int x = 0; x < (*invDB)[k]->size(); x++){ // cout << " " << (*F2Graph)[(*(*invDB)[k])[x]]->item(); //} //cout << endl; process_cust_invert((*invDB)[k]->size(), (*invDB)[k]->array()); (*invDB)[k]->reset(); } } }} void sort_get_l2(int &l2cnt, int fd){ //write 2-itemsets counts to file int i, j, k, fcnt; long sortflen; int *sortary; sortflen = lseek(fd, 0, SEEK_CUR); if (sortflen < 0){ perror("SEEK SEQ"); exit(errno); } //cout << "SORT " << sortflen << endl; if (sortflen > 0){#ifdef DEC sortary = (int *) mmap((char *)NULL, sortflen, (PROT_READ|PROT_WRITE), (MAP_FILE|MAP_VARIABLE|MAP_PRIVATE), fd, 0);#else sortary = (int *) mmap((char *)NULL, sortflen, (PROT_READ|PROT_WRITE), MAP_PRIVATE, fd, 0);#endif if (sortary == (int *)-1){ perror("SEQFd MMAP ERROR"); exit(errno); } qsort(sortary, (sortflen/ITSZ)/2, 2*ITSZ, cmp2it);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -