neclat.cc,v

来自「charm是基于垂直数据集挖掘关联规则的一个著名算法」· CC,V 代码 · 共 471 行

CC,V
471
字号
head	1.1;access;symbols;locks	zaki:1.1; strict;comment	@// @;1.1date	2001.06.12.16.41.58;	author zaki;	state Exp;branches;next	;desc@Charm with Hashing.@1.1log@Initial revision@text@#include <iostream.h>#include <stdlib.h>#include "eclat.h"#include "assoc.h"#include "Graph.h"#include "Itemset.h"#include "memman.h"#include "Array.h"//#include "gbk.h"#include "Util.h"//#include "cliques.h"#include "maxset.h"#include "defset.h"#include "Count.h"#include "hashtable.h"#include "stats.h"extern unsigned long sumtidlist;extern int DBASE_MAXITEM;boolean ECLAT_bfs=TRUE;boolean ECLAT_eqc=TRUE;int ECLAT_search=BOTUP;#define ECLAT_maxisetsz 100CountAry FreCount, CloCount;Itemset *Eqclass::iset = NULL;Array<int> NumLargeItemset;int maxiter = 2;maxset *MaxSet;HashTable *HT;void Eqclass::alloc_tmpiset(int sz){   iset = new Itemset(ECLAT_maxisetsz,sz);}#ifdef PRUNEboolean prune_test(Eqclass *EQ) {   ListNodes<Itemset *> *tempnode=EQ->head();   Array<int> *tempAry=new Array<int>(tempnode->item()->iset());      for(tempnode=tempnode->next();tempnode;tempnode=tempnode->next()) {      tempAry->add(tempnode->item()->litem());   }   boolean TF=MaxSet->subset(tempAry);   delete tempAry;   return TF;}#endif//herevoid process_cluster(Eqclass *cluster){   //cout << "cluster:" << *cluster ;   ListNodes<Itemset *> *hdr, *hdr2,*phdr2,*tempnode;   int subset;   tempnode=cluster->head();   for (hdr = cluster->head(); hdr; hdr=hdr->next()){      Eqclass *EQ = new Eqclass;      phdr2 =hdr;       sumtidlist += hdr->item()->tidsize();      for (hdr2=hdr->next(); hdr2 ; ){         sumtidlist += hdr2->item()->tidsize();	Itemset *temp;         //cout << "hdr->item" << *hdr->item() ;         //cout << "hdr->item()->tid():"  << *hdr->item()->tidlist() << endl;         //cout << "hdr2->item()" << *hdr2->item() ;         //cout << "hdr2->item()->tid():"  << *hdr2->item()->tidlist() << endl;	if (use_diff) 	  temp=Itemset::diff(hdr->item(), hdr2->item(),0,&subset);	else temp=Itemset::intersect(hdr->item(), hdr2->item(),0,&subset);        if (hdr->item()->isetsize() > 1)           stats.incrcand(hdr->item()->isetsize());                 if (temp){          FreCount.add(temp->iset()->size());          stats.incravgtid(temp->iset()->size()-1,temp->tidsize());           switch(subset){          case AeqB: //A=B, A=AB, & delete B             //cout << "subset: " << subset << " A=B" << endl;             //cout << "hdr->item" << *hdr->item() ;             //cout << "hdr->item()->tid():"  << *hdr->item()->tidlist() << endl;             //cout << "hdr2->item()" << *hdr2->item() ;             //cout << "hdr2->item()->tid():"  << *hdr2->item()->tidlist() << endl;	    if (use_diff){	      //hdr->item()->insert(hdr2->item()->litem());	      hdr->item()->copy(temp->iset(), NULL);	      //hdr->item()->hval() = temp->hval();	      //temp->copy(NULL, hdr->item()->tidlist());	      //temp->support() = hdr->item()->support();	      //temp->diff() = hdr->item()->diff();	      //temp->idrop() = hdr->item()->idrop();	    }	    else{	      delete hdr->item();	      hdr->item()=new Itemset(*temp);	      hdr->item()->support() = temp->support();	      hdr->item()->diff() = temp->diff();	    }	     for(tempnode= EQ->head(); tempnode; tempnode=tempnode->next()){             //replace A with AB                tempnode->item()->insert(hdr2->item()->litem());                FreCount.add(tempnode->item()->iset()->size());                stats.incravgtid(tempnode->item()->iset()->size()-1,                                 tempnode->item()->tidsize());              }             if(phdr2){                phdr2->lnext()=hdr2->next();                delete hdr2;                hdr2=phdr2;             }             else {                cluster->set_head(hdr2->next());                delete hdr2;                hdr2=NULL;             }             //cout << "after A=B" << *hdr->item() << endl;             break;          case AsubB: // A<B,replace A with AB             //cout << "subset: " << subset << " A<B" << endl;             //cout << "temp" << *temp ;             //hdr->set_bit();             if (use_diff){	       //hdr->item()->insert(hdr2->item()->litem());	       hdr->item()->copy(temp->iset(), NULL);	       //hdr->item()->hval() = temp->hval();	       //temp->copy(NULL, hdr->item()->tidlist());	       //temp->support() = hdr->item()->support();	       //temp->diff() = hdr->item()->diff();	       //temp->idrop() = hdr->item()->idrop();	     }	     else {	       delete hdr->item();	       hdr->item()=new Itemset(*temp);	       hdr->item()->support() = temp->support();	       hdr->item()->diff() = temp->diff();	     }             tempnode=EQ->head();             for(; tempnode; tempnode=tempnode->next()){ //replace A with AB                tempnode->item()->insert(hdr2->item()->litem());                FreCount.add(tempnode->item()->iset()->size());                stats.incravgtid(tempnode->item()->iset()->size()-1,                                 tempnode->item()->tidsize());              }             //cout << "hdr: " << *hdr->item() << endl;             break;          case BsubA: // A>B, delete B & save AB             //cout << "subset: " << subset << " A>B" << endl;             //hdr->set_bit();             if(phdr2) {                phdr2->lnext()=hdr2->next();                delete hdr2;                hdr2=phdr2;             }             else {                cluster->set_head(hdr2->next());                delete hdr2;                hdr2=NULL;             }             break;          case AneqB: //A<>B,nothing             //cout << "subset: " << subset << " A<>B" << endl;             //hdr->set_bit();             //hdr2->set_bit();             break;          }            // edn of switch          //cout << "join: " << *temp ;          if(subset == BsubA || subset == AneqB ){                         if (process_order == DE_SUP)                EQ->addsort(temp,Itemset::cmp_sup,0);             else if (process_order == IN_SUP)                EQ->addsort(temp,Itemset::cmp_sup,1);             else                EQ->append(temp);          }  //end of if (subset>=2)         } //end if temp         if(hdr2) {            phdr2=hdr2;            hdr2=hdr2->next();         }         else {           cout << "ERROR"  << endl;           //hdr2=cluster->head();         }      }// end of hdr2      CloCount.add(hdr->item()->iset()->size());      //CCif(HT->add(hdr->item()))         if (use_output) cout  << *hdr->item();      if (EQ->size() > 1){         process_cluster(EQ);         delete EQ;      }      else   {        if(EQ->size()==1) {           CloCount.add(EQ->head()->item()->iset()->size());           //CC if(HT->add(EQ->head()->item())) 	     if (use_output) cout << *EQ->head()->item();        }        if(EQ) delete EQ;     }  //end of EQ->size()   }  // end of hdr/*   for(tempnode=cluster->head();tempnode ;tempnode=tempnode->next()) {      if(!tempnode->chbit()){        CloCount.add(tempnode->item()->iset()->size());        cout << *tempnode->item() ;      }   }*/}void process_F1(Eqclass *F1EQ) {   ListNodes<Itemset *> *hdr, *hdr2,*phdr2,*tempnode;   int subset;   Array <int> *cls=new Array<int>;   Itemset *it1;      hdr = F1EQ->head();   for (; hdr; hdr=hdr->next()){      if (hdr->item()->memflg()) it1 = hdr->item();      else{                 it1 = Memman::read_from_disk(hdr->item()->litem());              delete hdr->item(); //new         hdr->item()=it1; //new      }            stats.incravgtid(0,it1->tidsize());       Eqclass *EQ = new Eqclass;      phdr2=hdr;            GrNode *grn = (*F2Graph)[hdr->item()->litem()];      cls->reset();      for (int j=0; j < grn->size(); j++){         cls->add((*grn)[j]->adj());      }            for (hdr2 = hdr->next(); hdr2 ; hdr2=hdr2->next()){         int i2=hdr2->item()->litem();         //cout << "hdr2:" << *hdr2->item() ;         if(cls->search(i2)==-1) {           phdr2=hdr2;           continue;         }         //else cout << "found" << endl;         //(*Eqclass::iset->iset())[0] = hdr2->item()->litem();         //Eqclass::iset->iset()->size()=1;         //Eqclass::iset->memflg() = FALSE;         //Memman::read_from_disk(Eqclass::iset,0);         Itemset *it2;         if (hdr2->item()->memflg()) it2 = hdr2->item();                 else{            it2 = Memman::read_from_disk(hdr2->item()->litem());            delete hdr2->item();            hdr2->item() = it2;                    }                 //delete Eqclass::iset;         Eqclass::iset = it2;         	 Itemset * temp;         if (diff_input)            temp = Itemset::diff(hdr->item(), Eqclass::iset, 0, &subset);         else if (use_diff)             temp = Itemset::diff2it(hdr->item(), Eqclass::iset,                                     &subset);	 else            temp = Itemset::intersect2it(hdr->item(), Eqclass::iset,                                          &subset);         	 // cout << "hdr->item" << *hdr->item() ;         //cout << "hdr->item()->tid():"  << *it1->tidlist() << endl;         //cout << "hdr2->item()" << *hdr2->item() ;         //cout << "hdr2->item()->tid():"  << *Eqclass::iset->tidlist() << endl;         if (temp){            FreCount.add(temp->iset()->size());            stats.incravgtid(temp->iset()->size()-1,temp->tidsize());             switch(subset){            case AeqB: //A=B, A=AB, & delete B	    //cout << "subset: " << subset << " A=B" << endl;	    if (use_diff){	      //hdr->item()->insert(hdr2->item()->litem());	      hdr->item()->copy(temp->iset(), NULL);	      //hdr->item()->hval() = temp->hval();	      //temp->copy(NULL, hdr->item()->tidlist());	      //temp->support() = hdr->item()->support();	      //temp->diff() = hdr->item()->diff();	      //temp->idrop() = hdr->item()->idrop();              //hdr->item()->support() = temp->support();              //hdr->item()->diff() = temp->diff();	    }	    else{	      delete hdr->item();	      hdr->item()=new Itemset(*temp);	      hdr->item()->support() = temp->support();	      hdr->item()->diff() = temp->diff();	    }	     for(tempnode= EQ->head();tempnode; tempnode=tempnode->next()){              //replace A with AB                tempnode->item()->insert(hdr2->item()->litem());                FreCount.add(tempnode->item()->iset()->size());                 stats.incravgtid(tempnode->item()->iset()->size()-1,                                 tempnode->item()->tidsize());              }             phdr2->lnext()=hdr2->next();             delete hdr2;             hdr2=phdr2;                          break;          case AsubB: // A<B,replace A with AB	    //cout << "subset: " << subset << " A<B" << endl;             //hdr->set_bit();	    if (use_diff){	      //hdr->item()->insert(hdr2->item()->litem());	      hdr->item()->copy(temp->iset(), NULL);	      //hdr->item()->hval() = temp->hval();	      //temp->copy(NULL, hdr->item()->tidlist());	      //temp->support() = hdr->item()->support();	      //temp->diff() = hdr->item()->diff();	      //temp->idrop() = hdr->item()->idrop();              //hdr->item()->support() = temp->support();              //hdr->item()->diff() = temp->diff();	    }	    else{	       delete hdr->item();	       hdr->item()=new Itemset(*temp);	       hdr->item()->support() = temp->support();	       hdr->item()->diff() = temp->diff();	    }             tempnode=EQ->head();             for(; tempnode; tempnode=tempnode->next()){ //replace A with AB                tempnode->item()->insert(hdr2->item()->litem());                FreCount.add(tempnode->item()->iset()->size());                 stats.incravgtid(tempnode->item()->iset()->size()-1,                                 tempnode->item()->tidsize());             }             break;          case BsubA: // A>B, delete B & save AB	    //cout << "subset: " << subset << " A>B" << endl;             //hdr->set_bit();             phdr2->lnext()=hdr2->next();             delete hdr2;             hdr2=phdr2;             break;          case AneqB: //A<>B,nothing	    //cout << "subset: " << subset << " A<>B" << endl;             //hdr->set_bit();             //hdr2->set_bit();             break;          }            // edn of switch          //cout << "  d:" << temp->idrop() << "join: " << *temp << endl;          if(subset == BsubA || subset == AneqB ){             if (process_order == DE_SUP)                EQ->addsort(temp,Itemset::cmp_sup,0);             else if (process_order == IN_SUP)                EQ->addsort(temp,Itemset::cmp_sup,1);             else                EQ->append(temp);                  //cout << "EQ:" << *EQ << endl;          }  //end of if (subset>=2)         } //end if temp       phdr2=hdr2;       //cout<< "F1EQ" << *F1EQ << endl;      }// end of hdr2                 //if (EQ) cout << "EQ:" << *EQ << endl;      CloCount.add(hdr->item()->iset()->size());      //CC if(HT->add(hdr->item()))          if (use_output) cout << *hdr->item();      if (EQ->size() > 1){                process_cluster(EQ);         delete EQ;              }      else      {        if(EQ->size()==1) {           CloCount.add(EQ->head()->item()->iset()->size());           //CC if(HT->add(EQ->head()->item()))	     if (use_output) cout << *EQ->head()->item();        }        else if(!hdr->chbit()){           //cout << *hdr->item();#ifdef PRUNE           if(hdr->item()->isetsize()>3)			   MaxSet->add(hdr->item());           //cout << "MaxSet" << *MaxSet << endl;#endif        }        if(EQ) delete EQ;      } // End of EQ->size      delete hdr->item();        }  // end of hdr/*   for(tempnode=F1EQ->head();tempnode ;tempnode=tempnode->next()) {     CloCount.add(tempnode->item()->iset()->size());     cout << *tempnode->item() ;   }*/}void process_eqc(){ // creat F1EQClass   //cout << Graph::numF1 << endl;   Eqclass *F1EQ= new Eqclass;   float rate=1-((float)(Graph::numF1)/DBASE_MAXITEM);//cout << "rate: " << rate << endl;   //unsigned int HTsize=(Graph::numF1)*(Graph::numF1)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate);   unsigned int HTsize=50000;   //cout << "HashTable Size: " << HTsize << endl;   //CC HT= new HashTable(HTsize);    FreCount.kadd(1,F2Graph->size());   for(int i=0; i < F2Graph->size(); i++){      Itemset *F1= new Itemset(1,0);      F1->iset()->add(i);          F1EQ->append(F1);   }      process_F1(F1EQ);}void ECLAT_Find_Freq(){   NumLargeItemset.add(Graph::numF1);   NumLargeItemset.add(0);      process_eqc();      //FreCount.print();   //CloCount.print();   //cout << *HT << endl;   //HT->print();//   RCloCount.print();}@

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?