⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 itemset.cc,v

📁 charm是基于垂直数据集挖掘关联规则的一个著名算法
💻 CC,V
字号:
head	1.3;access;symbols;locks	zaki:1.3; strict;comment	@// @;1.3date	2001.09.22.19.35.24;	author zaki;	state Exp;branches;next	1.2;1.2date	2001.06.24.21.17.20;	author zaki;	state Exp;branches;next	1.1;1.1date	2001.06.12.16.41.58;	author zaki;	state Exp;branches;next	;desc@Charm with Hashing.@1.3log@*** empty log message ***@text@#include "Itemset.h"#include "Graph.h"Array<int> *Itemset::TmpIset = NULL;int Itemset::NumIntersect = 0;Itemset::Itemset(int itsz, int tsz){   theItemset = NULL;   if (itsz > 0) theItemset = new Array<int>(itsz);   theTidList = NULL;   if (tsz > 0) theTidList = new Array<int>(tsz);   theMemflg = FALSE;   theSupport = 0;   theHashval = 0;   theDiff = 0;   drop=0;   //hashval = 0;}Itemset::Itemset(int itsz, Array<int> *tidary){   theItemset = NULL;   if (itsz > 0) theItemset = new Array<int>(itsz);   theTidList = NULL;   if (tidary->size() > 0) theTidList = new Array<int>(tidary);   theMemflg = FALSE;      theSupport = 0;   theDiff = 0;   drop=0;   theHashval = 0;}Itemset::~Itemset(){   if (theItemset) delete theItemset;   if (theTidList) delete theTidList;}void Itemset::copy(Array<int> *iset, Array<int> *tlist){   if (iset) theItemset->copy(iset);   if (tlist){     if (!theTidList) theTidList = new Array<int>(tlist->size());     theTidList->copy(tlist);   }   drop=0;}void Itemset::alloc_tmpiset(int sz){   TmpIset = new Array<int>(sz);}void Itemset::iset_fill(Itemset *it1, Itemset *it2){  for (int i=0; i < it1->isetsize(); i++)    theItemset->optadd((*it1)[i]);  theItemset->optadd((*it2)[it2->isetsize()-1]);     //int i,j;   //for (i=0, j =0; i < it1->isetsize() && j < it2->isetsize();){   //  if ((*it1)[i] == (*it2)[j]){   //    theItemset->optadd((*it1)[i]);   //    i++;   //    j++;   //  }   //  else if ((*it1)[i] < (*it2)[j]){   //    theItemset->optadd((*it1)[i]);   //    i++;   //  }   //  else{   //    theItemset->optadd((*it2)[j]);   //    j++;   //  }   //}   //while (i < it1->isetsize()){   //  theItemset->optadd((*it1)[i]);   //  i++;   //}   //while (j < it2->isetsize()){   //  theItemset->optadd((*it2)[j]);   //  j++;   //}                                                    //cout << "FILL \t" << *it1 << flush;   //cout << *it2 << flush;   //cout << *theItemset << endl;}Itemset *Itemset::intersect_kway(Itemset *it1, Array<Itemset *> *it,                                 int noalloc, int copyit){   int i, k;   k = it->size();   NumIntersect++;   TmpIset->reset();   //do the k-way intersect   int *dc = new int[k];   int *df = new int[k];   int *pos = new int[k];      for (i=0; i < k; i++){      dc[i] = (*it)[i]->support()-MINSUPPORT;      df[i] = 0;      pos[i] = 0;   }      int done=0;   int cnt, lcnt;   while (!done){      for (i=0; i < k; i++){         if (pos[i] >= (*it)[i]->support()) done=1;         if (!copyit && df[i] > dc[i]) done=1;      }      if (done) break;      cnt=0;      lcnt=0;      for (i=1; i < k; i++){         if ((*(*it)[0]->theTidList)[pos[0]] > (*(*it)[i]->theTidList)[pos[i]]){            pos[i]++;            df[i]++;            lcnt++;         }         else if ((*(*it)[0]->theTidList)[pos[0]] ==                  (*(*it)[i]->theTidList)[pos[i]]){            cnt++;         }      }      if (lcnt == 0){         if (cnt == k-1){            TmpIset->optadd((*(*it)[0]->theTidList)[pos[0]]);            for (i=0; i < k; i++) pos[i]++;         }         else {            df[0]++;            pos[0]++;         }      }   }   delete [] dc;   delete [] df;   delete [] pos;      Itemset *ret = NULL;   if (TmpIset->size() >= MINSUPPORT){      if (noalloc){         ret = it1;         it1->copy(NULL,TmpIset);         //it1->iset()->optadd((*it2)[it2->isetsize()-1]);      }      else{         ret = new Itemset(it1->isetsize()+1, TmpIset);         //ret->iset_fill(it1,it2);      }   }   else if (copyit){      ret = it1;      it1->copy(NULL,TmpIset);         }   if (ret) ret->setdrop(0);   return ret;}Itemset *Itemset::intersect(Itemset *it1, Itemset *it2, int noalloc, int *subset){   NumIntersect++;   TmpIset->reset();   int dc1 = it1->support()-MINSUPPORT;   int dc2 = it2->support()-MINSUPPORT;   int tidsum = 0;   int df1=0;   int df2=0;   int i,j;   for (i=0,j=0; i < it1->support() && j < it2->support();){      if (df1 > dc1 || df2 > dc2) break;      if ((*it1->theTidList)[i] > (*it2->theTidList)[j]){         j++;         df2++;      }      else if ((*it1->theTidList)[i] < (*it2->theTidList)[j]){         df1++;         i++;      }      else{         TmpIset->optadd((*it1->theTidList)[i]);         tidsum += (*it1->theTidList)[i];         j++;         i++;      }   }   if (TmpIset->size() >= MINSUPPORT){           if( i < it1->support() )  df1++;      if( j < it2->support() )  df2++;   }   Itemset *ret = NULL;   if (TmpIset->size() >= MINSUPPORT){      if (noalloc){         ret = it1;         it1->copy(NULL,TmpIset);	 it1->support() = TmpIset->size();         it1->iset()->optadd((*it2)[it2->isetsize()-1]);         it1->diff() = MINSUPPORT-it1->support();                 it1->hashval() = tidsum;      }      else{         ret = new Itemset(it1->isetsize()+1, TmpIset);         ret->iset_fill(it1,it2);	 ret->support() = TmpIset->size();         ret->diff() = MINSUPPORT-ret->support();                 ret->hashval() = tidsum;      }      if (subset != NULL){         if (df1 == 0 && df2 == 0) *subset = AeqB;         else if (df1 == 0 && df2 != 0) *subset = AsubB;         else if (df1 !=0 && df2 == 0) *subset = BsubA;         else *subset = AneqB;      }      ret->setdrop(it1->support()-TmpIset->size());   }   return ret;}Itemset *Itemset::intersect2it(Itemset *it1, Itemset *it2,int *subset){   NumIntersect++;   TmpIset->reset();   int i,j,df1=0,df2=0;   int tidsum = 0;   int dc1 = it1->support()-MINSUPPORT;   int dc2 = it2->support()-MINSUPPORT;   for (i=0,j=0; i < it1->support() && j < it2->support();){      if (df1 > dc1 || df2 > dc2) break;      if ((*it1->theTidList)[i] > (*it2->theTidList)[j]){         j++;         df2++;      }      else if ((*it1->theTidList)[i] < (*it2->theTidList)[j]){         i++;         df1++;      }      else{         TmpIset->optadd((*it1->theTidList)[i]);         tidsum += (*it1->theTidList)[i];         j++;         i++;      }   }   if (TmpIset->size() >= MINSUPPORT){           if( i < it1->support() )  df1++;      if( j < it2->support() )  df2++;   }      Itemset *ret = NULL;   if (TmpIset->size() >= MINSUPPORT){      ret = new Itemset(it1->isetsize()+1, TmpIset);      ret->iset_fill(it1,it2);      ret->support() = TmpIset->size();      ret->diff() = MINSUPPORT-ret->support();              ret->hashval() = tidsum;      if (subset != NULL){         if (df1 == 0 && df2 == 0) *subset = AeqB;         else if (df1 == 0 && df2 != 0) *subset = AsubB;         else if (df1 !=0 && df2 == 0) *subset = BsubA;         else *subset = AneqB;      }      ret->setdrop(it1->support()-TmpIset->size());   }   return ret;}Itemset *Itemset::diff2it(Itemset *it1, Itemset *it2,int *subset){   NumIntersect++;   TmpIset->reset();   int i,j,df1=0,df2=0;   int diffsum=0;   //int hval = 0;   int dc1 = it1->support()-MINSUPPORT;   //int dc2 = it2->support()-MINSUPPORT;   for (i=0,j=0; i < it1->support() && j < it2->support() 	  && df1 <= dc1;){      if (it1->tid(i) > it2->tid(j)){         j++;         df2++;      }      else if (it1->tid(i) < it2->tid(j)){	TmpIset->optadd(it1->tid(i));        diffsum += it1->tid(i);	i++;	df1++;      }      else{	//hval += it1->tid(i);         j++;         i++;      }   }   for (; i < it1->support() && df1 <= dc1; i++){     TmpIset->optadd(it1->tid(i));     diffsum += it1->tid(i);     df1++;   }   if (j < it2->support() && df1 <= dc1) df2++;        Itemset *ret = NULL;   if (df1 <= dc1){      ret = new Itemset(it1->isetsize()+1, TmpIset);      ret->iset_fill(it1,it2);      ret->diff() = df1;      ret->support() = it1->support() - ret->diff();       ret->hashval() = it1->hashval() - diffsum;      //ret->hval() = hval;      if (subset != NULL){         if (df1 == 0 && df2 == 0) *subset = AeqB;         else if (df1 == 0 && df2 > 0) *subset = AsubB;         else if (df1 > 0 && df2 == 0) *subset = BsubA;         else *subset = AneqB;      }      ret->setdrop(TmpIset->size());   }   return ret;}Itemset *Itemset::diff(Itemset *it1, Itemset *it2, int noalloc, int *subset){   int i,j;   NumIntersect++;   TmpIset->reset();    int diff1= 0;   int diff2 = 0;   int diffsum = 0;   //int hval = 0;   int osup1 = it1->support();   int dc1 = it1->support()-MINSUPPORT;   for (i=0,j=0; i < it2->diff() && j < it1->diff() 	  && diff2 <= dc1;){      if (it2->tid(i) > it1->tid(j)){	diff1++;         j++;      }      else if (it2->tid(i) == it1->tid(j)){         j++;         i++;      }      else{	//hval += it2->tid(i);         TmpIset->optadd(it2->tid(i));         diffsum += it2->tid(i);         diff2++;         i++;      }   }   for (; i < it2->diff() && diff2 <= dc1; i++){     //hval += it2->tid(i);     TmpIset->optadd(it2->tid(i));     diffsum += it2->tid(i);     diff2++;   }   if (j < it1->diff() && diff2 <= dc1) diff1++;      //TmpIset->support() = it1->support() - TmpIset->diff();   Itemset *ret = NULL;   if (diff2 <= dc1){      if (noalloc){         ret = it1;         it1->copy(NULL,TmpIset);         it1->iset()->optadd((*it2)[it2->isetsize()-1]);	 it1->diff() = diff2;	 it1->support() = osup1 - it1->diff();	 it1->hashval() = it1->hashval() - diffsum;      }      else{         ret = new Itemset(it1->isetsize()+1, TmpIset);         ret->iset_fill(it1,it2);	 ret->diff() = diff2;	 ret->support() = it1->support() - ret->diff();	 ret->hashval() = it1->hashval() - diffsum;      }      if (subset != NULL){         if (diff1 == 0 && diff2 == 0) *subset = AeqB;         else if (diff1 == 0 && diff2 > 0) *subset = BsubA;         else if (diff1 > 0 && diff2 == 0) *subset = AsubB;         else *subset = AneqB;      }      ret->setdrop(TmpIset->size());   }   return ret;} ostream& operator << (ostream& fout, Itemset& iset){   int i,j=0;      //if (iset.isetsize() != 2) return fout;      if (iset.iset()){      int *temp= new int[iset.isetsize()];      for (i=0; i < iset.isetsize(); i++){         int k=(*F2Graph)[iset[i]]->item();#ifndef SORT_ITEM         fout << k << " ";      }#else         temp[j]=k;         j++;      }      qsort(temp,iset.isetsize(),sizeof(int),cmpint);      fout <<": ";      for(i=0;i<  iset.isetsize(); i++)         fout << temp[i] << " " ;#endif      delete temp;   }   fout << "- ";   fout << iset.support();//fout << " " << iset.hashval();//   if (iset.tidlist()){//      fout << " ( ";//      for (i=0; i < iset.tidlist()->size(); i++)//         fout << (*iset.tidlist())[i] << " "; //   }//   fout << ")" << endl;   fout << endl;   return fout;}int Itemset::cmp_iset(const void *a, const void *b){   Itemset *ia = (Itemset *)a;   Itemset *ib = (Itemset *)b;   for (int i=0; i < ia->isetsize(); i++){      if ((*ia)[i] > (*ib)[i]) return 1;      else if ((*ia)[i] < (*ib)[i]) return -1;   }   return 0;}int Itemset::cmp_sup(const void *a, const void *b){   Itemset *ia = (Itemset *)a;   Itemset *ib = (Itemset *)b;   if (ia->support() > ib->support()) return 1;   else if (ia->support() < ib->support()) return -1;   return 0;}int Itemset::cmp_drop(const void *a, const void *b){   Itemset *ia = (Itemset *)a;   Itemset *ib = (Itemset *)b;   if (ia->idrop() > ib->idrop()) return 1;   else if (ia->idrop() < ib->idrop()) return -1;   return 0;}void Itemset::insert(int k) {   theItemset->add((*theItemset)[theItemset->size()-1]);   (*theItemset)[theItemset->size()-2]=k;}   /*   Array<int> *tmp = new Array<int>(2);   for(i=0;i<prefix;i++)      tmp->add((*theItemset)[i]);   tmp->add(k);   for(j=prefix;j<theItemset->size();j++)      tmp->add((*theItemset)[i]);   delete theItemset;   theItemset = tmp;}*/@1.2log@added use_hash_map@text@d430 1a430 1   fout << " " << iset.hashval();@1.1log@Initial revision@text@d190 1a190 1         if (use_diff && use_hash) tidsum += (*it1->theTidList)[i];d250 1a250 1         if (use_diff && use_hash) tidsum += (*it1->theTidList)[i];d298 1a298 1        if (use_hash) diffsum += it1->tid(i);d311 1a311 1     if (use_hash) diffsum += it1->tid(i);d361 1a361 1         if (use_hash) diffsum += it2->tid(i);d369 1a369 1     if (use_hash) diffsum += it2->tid(i);d406 4a409 1      int i,j=0;d428 12a439 10fout << "- ";fout << iset.support();if (iset.tidlist()){   fout << " ( ";   for (i=0; i < iset.tidlist()->size(); i++)      fout << (*iset.tidlist())[i] << " ";   }fout << ")" << endl;return fout;@

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -