⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 extl2.cc,v

📁 charm是基于垂直数据集挖掘关联规则的一个著名算法
💻 CC,V
📖 第 1 页 / 共 2 页
字号:
head	1.2;access;symbols;locks	zaki:1.2; strict;comment	@// @;1.2date	2001.06.24.21.17.20;	author zaki;	state Exp;branches;next	1.1;1.1date	2001.06.12.16.41.58;	author zaki;	state Exp;branches;next	;desc@Charm with Hashing.@1.2log@added use_hash_map@text@#include <errno.h>#include <iostream.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <unistd.h>#include <sys/stat.h>#include <fcntl.h>#include <sys/mman.h>#include <strings.h>#include <limits.h>#include "extl2.h"#include "assoc.h"#include "partition.h"#include "Graph.h"#include "Util.h"#define isetbufsz 1024extern unsigned long int DB_size;int isetbuf[isetbufsz];int isetpos=0;int isetfd;unsigned char **cset_sup;unsigned int **set_sup;invdb *invDB;int EXTBLKSZ;int Graph::numF1=0;extern char tempf[];invdb::invdb(int sz): Array<Array<int> *>(sz){   for (int i=0; i < totsize(); i++){      (*this)[i] = NULL;   }   }invdb::~invdb(){   for (int i=0; i < totsize(); i++){      if ((*this)[i]) delete (*this)[i];   }}void invdb::add_db(int tid, int it){   if ((*this)[tid] == NULL){      (*this)[tid] = new Array<int>((int)DBASE_AVG_TRANS_SZ);   }   (*this)[tid]->add(it);}int cmp2it(const void *a, const void *b){   int *ary = (int *)a;   int *bry = (int *)b;   if (ary[0] < bry[0]) return -1;   else if (ary[0] > bry[0]) return 1;   else{      if (ary[1] < bry[1]) return -1;      else if (ary[1] > bry[1]) return 1;      else return 0;   }}int make_horizontal_l1_pass(Dbase_Ctrl_Blk &DCB){   int i,j;   int supsz;   int maxsup = 0;      int *itcnt = new int [DBASE_MAXITEM];   bzero((char *)itcnt, DBASE_MAXITEM*sizeof(int));   F2Graph = new Graph(100);   Graph::numF1 = 0;   int *buf, custid, tid, numitem;   DCB.get_first_blk();   DCB.get_next_trans(buf, numitem, tid, custid);   while (!DCB.eof()){      for (j=0; j < numitem; j++){         itcnt[buf[j]]++;      }      DCB.get_next_trans(buf, numitem, tid, custid);   }    DCB.freqidx = new int[DBASE_MAXITEM];   DCB.tidbuflen=0;   for (i=0; i < DBASE_MAXITEM; i++){      supsz = itcnt[i];      DB_size+=supsz;      if (diff_input) supsz = DBASE_NUM_TRANS - supsz;            if (supsz >= MINSUPPORT){         //if (diff_input) stats[0]->avgtid += DBASE_NUM_TRANS - supsz;         //else stats[0]->avgtid += supsz;                  F2Graph->add_node(i,supsz);         DCB.freqidx[i] = Graph::numF1;	 //cout << "LARGE " << i << " " << Graph::numF1 << " " << supsz << endl;         Graph::numF1++;         DCB.tidbuflen += supsz;      }      else DCB.freqidx[i] = -1;            if (supsz > maxsup) maxsup = supsz;   }   cout << "Graph::numF1 " << Graph::numF1 << endl;      F2Graph->compact();   delete [] itcnt;   int fd=-1;   //if ((fd = open ("/tmp/tidbuf", (O_WRONLY|O_CREAT|O_TRUNC), 0666)) < 0){   //  perror("Can't open tidbuf file");   //  exit (errno);   //}                                                                  DCB.tidbuf = (int *) mmap ((char *)NULL, DCB.tidbuflen*ITSZ, 			      (PROT_READ|PROT_WRITE), 			      MAP_ANON|MAP_PRIVATE, fd, 0);   if (DCB.tidbuf == (int *)-1){     perror("tidbuf MMAP ERROR");     exit(errno);   }   DCB.tidlists = new Array<int> *[Graph::numF1];      int offt = 0;   for (i=0; i < Graph::numF1; i++){     DCB.tidlists[i] = new Array<int> ((*F2Graph)[i]->sup(), &DCB.tidbuf[offt]);     offt += (*F2Graph)[i]->sup();   }   return maxsup;}int make_vertical_l1_pass(){   int i;   int supsz;   int idxval = -1;      int maxsup = 0;      F2Graph = new Graph(100);   Graph::numF1 = 0;   for (i=0; i < DBASE_MAXITEM; i++){      idxval = partition_idxval(i);      if (idxval == -1) continue; //item has no tidlist            supsz = partition_get_idxsup(i);      DB_size+=supsz;      if (diff_input) supsz = DBASE_NUM_TRANS - supsz;            if (supsz >= MINSUPPORT){         F2Graph->add_node(i,supsz);         Graph::numF1++;      }      if (supsz > maxsup) maxsup = supsz;   }   cout << "Graph::numF1 " << Graph::numF1 << endl;      F2Graph->compact();   return maxsup;}int make_l1_pass(Dbase_Ctrl_Blk &DCB){   if (use_horizontal) return make_horizontal_l1_pass(DCB);   else return make_vertical_l1_pass();}void process_cust_invert(int curcnt, int *curit){   int i,j;   int it1, it2;   //for (i=0; i < curcnt; i++)   //   cout << " " << (*F2Graph)[curit[i]]->item();   //cout << endl;   if (use_char_extl2){      for (i=0; i < curcnt; i++){         it1 = curit[i];         if (cset_sup[it1]){            for (j=i+1; j < curcnt; j++){               it2 = curit[j];               if ((++cset_sup[it1][it2-it1-1]) == 0){                  if (isetpos+2 > isetbufsz){                     write(isetfd, (char *)isetbuf, isetpos*ITSZ);                     isetpos = 0;                  }                  isetbuf[isetpos++] = it1;                  isetbuf[isetpos++] = it2;               }            }         }      }   }   else{      for (i=0; i < curcnt; i++){         it1 = curit[i];         if (set_sup[it1]){            for (j=i+1; j < curcnt; j++){               it2 = curit[j];               set_sup[it1][it2-it1-1]++;            }         }      }         }}void process_diff_cust(int curcnt, int *curit){   int i,j;   static Array<int> trans(100);   trans.reset();      //for (i=0; i < curcnt; i++)   //   cout << " " << curit[i];   //cout << endl;   i = 0; //iterate over numitems   j = 0; //idx into curit      for (j=0; j < curcnt; j++){      while (i < curit[j]){         if (partition_idxval((*F2Graph)[i]->item()) != -1) trans.add(i);         i++;      }      i++; //skip over curit[j]   }   for (; i < Graph::numF1; i++){      if (partition_idxval((*F2Graph)[i]->item()) != -1) trans.add(i);   }   process_cust_invert(trans.size(), trans.array());   }void process_invert(int pnum){   int i,k;   int minv, maxv;   if (diff_input){      minv = DBASE_MINTRANS;      maxv = DBASE_MAXTRANS;         }   else{            minv = INT_MAX;      maxv = 0;      for (i=0; i < Graph::numF1; i++){         partition_get_minmaxtid(pnum, (*F2Graph)[i]->item(), minv, maxv);      }   }   //cout << "MINMAX " << minv << " " << maxv << endl;   if (invDB->totsize() < maxv-minv+1)      invDB->Realloc(maxv-minv+1);      int supsz;   int ivalsz=0;   int *ival = NULL;   for (i=0; i < Graph::numF1; i++){      supsz = partition_get_lidxsup(pnum, (*F2Graph)[i]->item());      if (ivalsz < supsz){         ivalsz = Util<int>::Realloc(supsz, ITSZ, ival);      }      partition_lclread_item(ival, pnum, (*F2Graph)[i]->item());      int cid;      int midx;      for (int pos=0; pos < supsz; pos++)      {         cid = ival[pos];         midx = cid - minv;         invDB->add_db(midx,i);      }   }   for (k=0; k < maxv-minv+1; k++){      if (diff_input){         if ((*invDB)[k]){            if (Graph::numF1-(*invDB)[k]->size() > 0)               process_diff_cust((*invDB)[k]->size(), (*invDB)[k]->array());            (*invDB)[k]->reset();         }         else process_diff_cust(0, NULL);      }      else{         if ((*invDB)[k] && (*invDB)[k]->size() > 0){            //cout << "MVAL " << minv+k << " " << (*invDB)[k]->size() << endl;            //for (int x = 0; x < (*invDB)[k]->size(); x++){            //   cout << " " << (*F2Graph)[(*(*invDB)[k])[x]]->item();            //}            //cout << endl;                        process_cust_invert((*invDB)[k]->size(), (*invDB)[k]->array());            (*invDB)[k]->reset();         }      }   }} void sort_get_l2(int &l2cnt, int fd){   //write 2-itemsets counts to file   int i, j, k, fcnt;   long sortflen;   int *sortary;   sortflen = lseek(fd, 0, SEEK_CUR);   if (sortflen < 0){      perror("SEEK SEQ");      exit(errno);   }   //cout << "SORT " << sortflen << endl;   if (sortflen > 0){#ifdef DEC      sortary = (int *) mmap((char *)NULL, sortflen,                             (PROT_READ|PROT_WRITE),                             (MAP_FILE|MAP_VARIABLE|MAP_PRIVATE), fd, 0);#else      sortary = (int *) mmap((char *)NULL, sortflen,                             (PROT_READ|PROT_WRITE),                             MAP_PRIVATE, fd, 0);#endif      if (sortary == (int *)-1){         perror("SEQFd MMAP ERROR");         exit(errno);      }            qsort(sortary, (sortflen/ITSZ)/2, 2*ITSZ, cmp2it);      

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -