📄 extl2.cc,v
字号:
} int numel = sortflen/ITSZ; i = 0; fcnt = 0; for (j=0; j < Graph::numF1;j++){ for (k=j+1; k < Graph::numF1;k++){ fcnt = 0; if (sortflen > 0 && i < numel){ while (i < numel && j == sortary[i] && k == sortary[i+1]){ fcnt += 256; i += 2; } } if (cset_sup[j]) fcnt += (int) cset_sup[j][k-j-1]; if (fcnt >= MINSUPPORT){ //F2Graph->add_adj(j, (*F2Graph)[k]->item(), fcnt); F2Graph->add_adj(j, k, fcnt); (*F2Graph)[j]->supsum() += fcnt; (*F2Graph)[k]->supsum() += fcnt; //cout << "LARGE " << (*F2Graph)[j]->item() << " " << // (*F2Graph)[k]->item() << " SUPP " << fcnt << endl; l2cnt++; } } } if (sortflen > 0) munmap((caddr_t)sortary, sortflen);}void get_l2(int &l2cnt){ int j,k; int fcnt; for (j=0; j < Graph::numF1;j++){ if (set_sup[j]){ for (k=j+1; k < Graph::numF1;k++){ fcnt = set_sup[j][k-j-1]; if (fcnt >= MINSUPPORT){ F2Graph->add_adj(j, k, fcnt); (*F2Graph)[j]->supsum() += fcnt; (*F2Graph)[k]->supsum() += fcnt; l2cnt++; } } } }}void get_ext_l2(int &l2cnt){ int i; int mem_used=0; EXTBLKSZ = num_partitions+(DBASE_NUM_TRANS+num_partitions-1)/num_partitions; int tsz = (int) (DBASE_AVG_TRANS_SZ); invDB = new invdb(EXTBLKSZ); mem_used += EXTBLKSZ*ITSZ; mem_used += (int) (EXTBLKSZ*tsz*ITSZ); //cout << "CURITSZ " << tsz << " " << EXTBLKSZ << " " << mem_used << endl; char TEMPF[300]; sprintf(TEMPF,"%siset",tempf); if (use_char_extl2){ if ((isetfd = open(TEMPF, (O_RDWR|O_CREAT|O_TRUNC), 0666)) < 0){ perror("Can't open out file"); exit (errno); } cset_sup = new unsigned char *[Graph::numF1]; // support for 2-itemsets bzero((void *)cset_sup, Graph::numF1*sizeof(unsigned char *)); mem_used += Graph::numF1*sizeof(unsigned char *); } else{ set_sup = new unsigned int *[Graph::numF1]; // support for 2-itemsets bzero((void *)set_sup, Graph::numF1*sizeof(unsigned int *)); mem_used += Graph::numF1*sizeof(unsigned int *); } int low, high; int itsz; if (use_char_extl2) itsz = sizeof(unsigned char); else itsz = sizeof(unsigned int); for (low = 0; low < Graph::numF1; low = high){ if (use_char_extl2){ isetpos = 0; lseek(isetfd, 0, SEEK_SET); } for (high = low; high < Graph::numF1 && (mem_used+(Graph::numF1-high-1)*itsz) < AVAILMEM; high++){ if (Graph::numF1-high-1 > 0){ if (use_char_extl2){ cset_sup[high] = new unsigned char [Graph::numF1-high-1]; bzero((void *)cset_sup[high], (Graph::numF1-high-1)*itsz); } else{ set_sup[high] = new unsigned int [Graph::numF1-high-1]; //cout << "ALLOC " << high << " " << set_sup[high] << " " // << Graph::numF1-high-1 << endl; bzero((void *)set_sup[high], (Graph::numF1-high-1)*itsz); } } mem_used += (Graph::numF1-high-1) * itsz; //cout << "MEMUSEDLOOP " << mem_used << " " << endl; } //cout << "MEMUSEDLOOP " << mem_used << endl; //cout << "LOWHIGH " << high << " " << low << endl; for (int p=0; p < num_partitions; p++){ process_invert(p); } if (use_char_extl2){ if (isetpos > 0){ write(isetfd, (char *)isetbuf, isetpos*ITSZ); isetpos = 0; } sort_get_l2(l2cnt, isetfd); } else get_l2(l2cnt); // reclaim memory for (i = low; i < high; i++) { if (use_char_extl2){ //cout << "i " << i << " " << cset_sup[i] << endl << flush; if (cset_sup[i]) delete [] cset_sup[i]; cset_sup[i] = NULL; } else{ //cout << "DEL " << i << " " << set_sup[i] << endl << flush; if (set_sup[i]) delete [] set_sup[i]; set_sup[i] = NULL; } mem_used -= (Graph::numF1-i-1) * itsz; } } if (use_char_extl2){ close(isetfd); unlink(TEMPF); delete [] cset_sup; } else delete [] set_sup; delete invDB;}void get_file_l2(char *fname, int &l2cnt){ int *cntary; int fd = open(fname, O_RDONLY); if (fd < 1){ perror("can't open l2 file"); exit(errno); } int flen = lseek(fd,0,SEEK_END); if (flen > 0){#ifdef DEC cntary = (int *) mmap((char *)NULL, flen, PROT_READ, (MAP_FILE|MAP_VARIABLE|MAP_PRIVATE), fd, 0);#else cntary = (int *) mmap((char *)NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0);#endif if (cntary == (int *)-1){ perror("MMAP ERROR:cntary"); exit(errno); } // build F2graph -- large 2-itemset relations int lim = flen/ITSZ; //for (int i=0; i < lim; i += 3){ int i,j,k,res; int git[2]; for (j=0, i=0; j < Graph::numF1 && i < lim;j++){ for (k=j+1; k < Graph::numF1 && i < lim; k++){ git[0] = (*F2Graph)[j]->item(); git[1] = (*F2Graph)[k]->item(); //if (git[0] < cntary[i]) res = -1; //else if (git[0] > cntary[i]) res = 1; //else{ // if (git[0] < cntary[i+1]) res = -1; // else if (git[0] > cntary[i+1]) res = 1; // else res = 0; //} res = cmp2it(&git[0], &cntary[i]); if (res == 0){ if (cntary[i+2] >= MINSUPPORT){ //F2Graph->add_adj(j, cntary[i+1], cntary[i+2]); F2Graph->add_adj(j, k, cntary[i+2]); (*F2Graph)[j]->supsum() += cntary[i+2]; (*F2Graph)[k]->supsum() += cntary[i+2]; //cout << "LARGE " << cntary[i] << " " << cntary[i+1] // << " " << cntary[i+2] << endl; l2cnt++; } i += 3; } else if (res > 0){ i += 3; k--; } } } munmap((caddr_t)cntary, flen); } close(fd);}void get_horizontal_ext_l2(int &l2cnt, Dbase_Ctrl_Blk &DCB){ int i, j, k, idx; int it1, it2; int *itcnt2 = new int [(Graph::numF1*(Graph::numF1-1))/2]; bzero((char *)itcnt2, ((Graph::numF1*(Graph::numF1-1))/2)*sizeof(int)); int *offsets = new int [Graph::numF1]; int offt = 0; for (i=Graph::numF1-1; i >= 0; i--){ offsets[Graph::numF1-i-1] = offt; offt += i; } int *buf, custid, tid, numitem; DCB.get_first_blk(); DCB.get_next_trans(buf, numitem, tid, custid); while (!DCB.eof()){ for (i=0; i < numitem; i++){ it1 = DCB.freqidx[buf[i]]; if (it1 == -1) continue; DCB.tidlists[it1]->add(tid); idx = offsets[it1]-it1-1; for (j=i+1; j < numitem; j++){ it2 = DCB.freqidx[buf[j]]; if (it2 == -1) continue; itcnt2[idx+it2]++; } } DCB.get_next_trans(buf, numitem, tid, custid); } for (i=0; i < Graph::numF1-1; i++){ idx = offsets[i]-i-1; for (j=i+1; j < Graph::numF1; j++){ if (itcnt2[idx+j] >= MINSUPPORT){ it1 = (*F2Graph)[i]->item(); it2 = (*F2Graph)[j]->item(); F2Graph->add_adj(i, j, itcnt2[idx+j]); (*F2Graph)[i]->supsum() += itcnt2[idx+j]; (*F2Graph)[j]->supsum() += itcnt2[idx+j]; //cout << "LARGE " << it1 << " " << it2 // << " " << itcnt2[idx+j] << endl; l2cnt++; } } } delete [] itcnt2; for (i=0; i < Graph::numF1; i++){ if ((*F2Graph)[i]->sup() != DCB.tidlists[i]->size()) cout << "error in size " << i << endl; }}void sort_freqidx(Dbase_Ctrl_Blk &DCB){ int i,j; Array<int> **oldadd = new Array<int> *[Graph::numF1]; for (i=0; i < Graph::numF1; i++) oldadd[i] = DCB.tidlists[i]; for (i=0; i < DBASE_MAXITEM; i++){ DCB.freqidx[i] = -1; } for (i=0; i < Graph::numF1; i++){ DCB.freqidx[(*F2Graph)[i]->item()] = i; } for (i=0, j=0; i < DBASE_MAXITEM; i++){ if (DCB.freqidx[i] == -1) continue; DCB.tidlists[DCB.freqidx[i]] = oldadd[j]; j++; } //for (i=0; i < Graph::numF1; i++){ // cout << "F2N " << i << " " << (*F2Graph)[i]->item() << " " // << (*F2Graph)[i]->sup() << " "; // if (use_horizontal){ // cout << " -- " << DCB.tidlists[i]->size(); // } // cout << endl; // } delete [] oldadd;}int make_l2_pass(boolean ext_l2_pass, char *it2f, Dbase_Ctrl_Blk &DCB){ int i; int l2cnt=0; if (use_horizontal) get_horizontal_ext_l2(l2cnt,DCB); else if (ext_l2_pass) get_ext_l2(l2cnt); else get_file_l2(it2f, l2cnt); //cout << "L2 : " << l2cnt << endl; for (i=0; i < F2Graph->size(); i++){ if ((*F2Graph)[i]->array() != NULL) (*F2Graph)[i]->compact(); //cout << "NODE " << i << " " << (*F2Graph)[i]->supsum() << " : "; //if ((*F2Graph)[i]->array() != NULL) cout << " " << *(*F2Graph)[i]; //cout << endl; } if (process_order == IN_SUP || process_order == DE_SUP){ F2Graph->sort(); //cout << "ORDER :"; for (i=0; i < F2Graph->size(); i++){ if ((*F2Graph)[i]->array() != NULL) (*F2Graph)[i]->compact(); //cout << " " << (*F2Graph)[i]->item(); //cout << "NODE " << i << " : "; //if ((*F2Graph)[i]->array() != NULL) cout << " " << *(*F2Graph)[i]; //cout << endl; } //cout << endl; if (use_horizontal) sort_freqidx(DCB); //cout << "GRAPH DENSITY " << // (2.0 *l2cnt)/(F2Graph->size()*(F2Graph->size()-1)) << endl; } return l2cnt;}@1.1log@Initial revision@text@d667 1d670 1a670 1 //cout << (*F2Graph)[i]->item();d675 1@
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -