⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 extl2.cc,v

📁 charm是基于垂直数据集挖掘关联规则的一个著名算法
💻 CC,V
📖 第 1 页 / 共 2 页
字号:
   }      int numel = sortflen/ITSZ;   i = 0;   fcnt = 0;   for (j=0; j < Graph::numF1;j++){      for (k=j+1; k < Graph::numF1;k++){         fcnt = 0;         if (sortflen > 0 && i < numel){            while (i < numel && j == sortary[i] && k == sortary[i+1]){               fcnt += 256;               i += 2;            }         }         if (cset_sup[j]) fcnt += (int) cset_sup[j][k-j-1];                  if (fcnt >= MINSUPPORT){            //F2Graph->add_adj(j, (*F2Graph)[k]->item(), fcnt);            F2Graph->add_adj(j, k, fcnt);            (*F2Graph)[j]->supsum() += fcnt;            (*F2Graph)[k]->supsum() += fcnt;            //cout << "LARGE " << (*F2Graph)[j]->item() << " " <<            //   (*F2Graph)[k]->item() << " SUPP " << fcnt << endl;            l2cnt++;         }      }   }   if (sortflen > 0) munmap((caddr_t)sortary, sortflen);}void get_l2(int &l2cnt){   int j,k;   int fcnt;      for (j=0; j < Graph::numF1;j++){      if (set_sup[j]){         for (k=j+1; k < Graph::numF1;k++){            fcnt = set_sup[j][k-j-1];            if (fcnt >= MINSUPPORT){               F2Graph->add_adj(j, k, fcnt);               (*F2Graph)[j]->supsum() += fcnt;               (*F2Graph)[k]->supsum() += fcnt;               l2cnt++;            }         }      }   }}void get_ext_l2(int &l2cnt){   int i;   int mem_used=0;   EXTBLKSZ = num_partitions+(DBASE_NUM_TRANS+num_partitions-1)/num_partitions;   int tsz = (int) (DBASE_AVG_TRANS_SZ);   invDB = new invdb(EXTBLKSZ);   mem_used += EXTBLKSZ*ITSZ;   mem_used += (int) (EXTBLKSZ*tsz*ITSZ);   //cout << "CURITSZ " << tsz << " " << EXTBLKSZ << " " << mem_used << endl;      char TEMPF[300];   sprintf(TEMPF,"%siset",tempf);   if (use_char_extl2){      if ((isetfd = open(TEMPF, (O_RDWR|O_CREAT|O_TRUNC), 0666)) < 0){         perror("Can't open out file");         exit (errno);            }      cset_sup = new unsigned char *[Graph::numF1];        // support for 2-itemsets      bzero((void *)cset_sup, Graph::numF1*sizeof(unsigned char *));         mem_used += Graph::numF1*sizeof(unsigned char *);   }   else{      set_sup = new unsigned int *[Graph::numF1];        // support for 2-itemsets      bzero((void *)set_sup, Graph::numF1*sizeof(unsigned int *));         mem_used += Graph::numF1*sizeof(unsigned int *);         }   int low, high;         int itsz;   if (use_char_extl2) itsz = sizeof(unsigned char);   else itsz = sizeof(unsigned int);      for (low = 0; low < Graph::numF1; low = high){      if (use_char_extl2){         isetpos = 0;         lseek(isetfd, 0, SEEK_SET);      }            for (high = low; high < Graph::numF1 &&              (mem_used+(Graph::numF1-high-1)*itsz) < AVAILMEM; high++){         if (Graph::numF1-high-1 > 0){            if (use_char_extl2){               cset_sup[high] = new unsigned char [Graph::numF1-high-1];               bzero((void *)cset_sup[high], (Graph::numF1-high-1)*itsz);            }            else{               set_sup[high] = new unsigned int [Graph::numF1-high-1];               //cout << "ALLOC " << high << " " << set_sup[high] << " "               //     << Graph::numF1-high-1 << endl;               bzero((void *)set_sup[high], (Graph::numF1-high-1)*itsz);            }         }         mem_used += (Graph::numF1-high-1) * itsz;         //cout << "MEMUSEDLOOP " << mem_used << " " << endl;      }      //cout << "MEMUSEDLOOP " << mem_used << endl;        //cout << "LOWHIGH " << high << " " << low << endl;      for (int p=0; p < num_partitions; p++){         process_invert(p);      }            if (use_char_extl2){         if (isetpos > 0){            write(isetfd, (char *)isetbuf, isetpos*ITSZ);            isetpos = 0;         }         sort_get_l2(l2cnt, isetfd);      }      else get_l2(l2cnt);            // reclaim memory      for (i = low; i < high; i++)      {         if (use_char_extl2){            //cout << "i " << i << " " << cset_sup[i] << endl << flush;            if (cset_sup[i]) delete [] cset_sup[i];            cset_sup[i] = NULL;         }         else{            //cout << "DEL " << i << " " << set_sup[i] << endl << flush;            if (set_sup[i]) delete [] set_sup[i];            set_sup[i] = NULL;         }         mem_used -= (Graph::numF1-i-1) * itsz;      }   }      if (use_char_extl2){      close(isetfd);      unlink(TEMPF);      delete [] cset_sup;   }   else delete [] set_sup;   delete invDB;}void get_file_l2(char *fname, int &l2cnt){   int *cntary;   int fd = open(fname, O_RDONLY);   if (fd < 1){      perror("can't open l2 file");      exit(errno);   }      int flen = lseek(fd,0,SEEK_END);   if (flen > 0){#ifdef DEC      cntary = (int *) mmap((char *)NULL, flen, PROT_READ,                             (MAP_FILE|MAP_VARIABLE|MAP_PRIVATE), fd, 0);#else      cntary = (int *) mmap((char *)NULL, flen, PROT_READ,                             MAP_PRIVATE, fd, 0);#endif      if (cntary == (int *)-1){         perror("MMAP ERROR:cntary");           exit(errno);      }            // build F2graph -- large 2-itemset relations      int lim = flen/ITSZ;      //for (int i=0; i < lim; i += 3){      int i,j,k,res;      int git[2];      for (j=0, i=0; j < Graph::numF1 && i < lim;j++){         for (k=j+1; k < Graph::numF1 && i < lim; k++){            git[0] = (*F2Graph)[j]->item();            git[1] = (*F2Graph)[k]->item();            //if (git[0] < cntary[i]) res = -1;            //else if (git[0] > cntary[i]) res = 1;            //else{            //   if (git[0] < cntary[i+1]) res = -1;            //   else if (git[0] > cntary[i+1]) res = 1;            //   else res = 0;            //}                        res = cmp2it(&git[0], &cntary[i]);            if (res == 0){               if (cntary[i+2] >= MINSUPPORT){                  //F2Graph->add_adj(j, cntary[i+1], cntary[i+2]);                  F2Graph->add_adj(j, k, cntary[i+2]);                  (*F2Graph)[j]->supsum() += cntary[i+2];                  (*F2Graph)[k]->supsum() += cntary[i+2];                  //cout << "LARGE " << cntary[i] << " " << cntary[i+1]                  //     << " " << cntary[i+2] << endl;                  l2cnt++;               }               i += 3;            }            else if (res > 0){               i += 3;               k--;            }         }      }            munmap((caddr_t)cntary, flen);   }   close(fd);}void get_horizontal_ext_l2(int &l2cnt, Dbase_Ctrl_Blk &DCB){   int i, j, k, idx;   int it1, it2;      int *itcnt2 = new int [(Graph::numF1*(Graph::numF1-1))/2];   bzero((char *)itcnt2, ((Graph::numF1*(Graph::numF1-1))/2)*sizeof(int));   int *offsets = new int [Graph::numF1];   int offt = 0;   for (i=Graph::numF1-1; i >= 0; i--){      offsets[Graph::numF1-i-1] = offt;      offt += i;   }       int *buf, custid, tid, numitem;   DCB.get_first_blk();   DCB.get_next_trans(buf, numitem, tid, custid);   while (!DCB.eof()){      for (i=0; i < numitem; i++){         it1 = DCB.freqidx[buf[i]];         if (it1 == -1) continue;	 DCB.tidlists[it1]->add(tid);         idx = offsets[it1]-it1-1;         for (j=i+1; j < numitem; j++){            it2 = DCB.freqidx[buf[j]];            if (it2 == -1) continue;            itcnt2[idx+it2]++;         }      }      DCB.get_next_trans(buf, numitem, tid, custid);   }   for (i=0; i < Graph::numF1-1; i++){      idx = offsets[i]-i-1;      for (j=i+1; j < Graph::numF1; j++){         if (itcnt2[idx+j] >= MINSUPPORT){            it1 = (*F2Graph)[i]->item();            it2 = (*F2Graph)[j]->item();            F2Graph->add_adj(i, j, itcnt2[idx+j]);            (*F2Graph)[i]->supsum() += itcnt2[idx+j];            (*F2Graph)[j]->supsum() += itcnt2[idx+j];            //cout << "LARGE " << it1 << " " << it2            //     << " " << itcnt2[idx+j] << endl;            l2cnt++;         }        }   }      delete [] itcnt2;     for (i=0; i < Graph::numF1; i++){     if ((*F2Graph)[i]->sup() != DCB.tidlists[i]->size())       cout << "error in size " << i << endl;   }}void sort_freqidx(Dbase_Ctrl_Blk &DCB){  int i,j;  Array<int> **oldadd = new Array<int> *[Graph::numF1];  for (i=0; i < Graph::numF1; i++)    oldadd[i] = DCB.tidlists[i];  for (i=0; i < DBASE_MAXITEM; i++){    DCB.freqidx[i] = -1;  }  for (i=0; i < Graph::numF1; i++){    DCB.freqidx[(*F2Graph)[i]->item()] = i;  }  for (i=0, j=0; i < DBASE_MAXITEM; i++){    if (DCB.freqidx[i] == -1) continue;    DCB.tidlists[DCB.freqidx[i]] = oldadd[j];    j++;  }  //for (i=0; i < Graph::numF1; i++){  //   cout << "F2N " << i << " " << (*F2Graph)[i]->item() << " "   //	  << (*F2Graph)[i]->sup() << " ";       //     if (use_horizontal){  //   cout << " -- " << DCB.tidlists[i]->size();  // }  // cout << endl;  // }  delete [] oldadd;}int make_l2_pass(boolean ext_l2_pass, char *it2f, Dbase_Ctrl_Blk &DCB){   int i;   int l2cnt=0;   if (use_horizontal) get_horizontal_ext_l2(l2cnt,DCB);   else if (ext_l2_pass) get_ext_l2(l2cnt);   else get_file_l2(it2f, l2cnt);   //cout << "L2 : " << l2cnt << endl;      for (i=0; i < F2Graph->size(); i++){      if ((*F2Graph)[i]->array() != NULL) (*F2Graph)[i]->compact();      //cout << "NODE " << i << " " << (*F2Graph)[i]->supsum() << " : ";      //if ((*F2Graph)[i]->array() != NULL) cout << " " << *(*F2Graph)[i];      //cout << endl;   }   if (process_order == IN_SUP || process_order == DE_SUP){           F2Graph->sort();      //cout << "ORDER :";      for (i=0; i < F2Graph->size(); i++){         if ((*F2Graph)[i]->array() != NULL) (*F2Graph)[i]->compact();         //cout << " " << (*F2Graph)[i]->item();         //cout << "NODE " << i << " : ";         //if ((*F2Graph)[i]->array() != NULL) cout << " " << *(*F2Graph)[i];         //cout << endl;      }      //cout << endl;      if (use_horizontal) sort_freqidx(DCB);            //cout << "GRAPH DENSITY " <<      //   (2.0 *l2cnt)/(F2Graph->size()*(F2Graph->size()-1)) << endl;   }      return l2cnt;}@1.1log@Initial revision@text@d667 1d670 1a670 1         //cout << (*F2Graph)[i]->item();d675 1@

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -