neclat.cc,v
来自「charm是基于垂直数据集挖掘关联规则的一个著名算法」· CC,V 代码 · 共 471 行
CC,V
471 行
head 1.1;access;symbols;locks zaki:1.1; strict;comment @// @;1.1date 2001.06.12.16.41.58; author zaki; state Exp;branches;next ;desc@Charm with Hashing.@1.1log@Initial revision@text@#include <iostream.h>#include <stdlib.h>#include "eclat.h"#include "assoc.h"#include "Graph.h"#include "Itemset.h"#include "memman.h"#include "Array.h"//#include "gbk.h"#include "Util.h"//#include "cliques.h"#include "maxset.h"#include "defset.h"#include "Count.h"#include "hashtable.h"#include "stats.h"extern unsigned long sumtidlist;extern int DBASE_MAXITEM;boolean ECLAT_bfs=TRUE;boolean ECLAT_eqc=TRUE;int ECLAT_search=BOTUP;#define ECLAT_maxisetsz 100CountAry FreCount, CloCount;Itemset *Eqclass::iset = NULL;Array<int> NumLargeItemset;int maxiter = 2;maxset *MaxSet;HashTable *HT;void Eqclass::alloc_tmpiset(int sz){ iset = new Itemset(ECLAT_maxisetsz,sz);}#ifdef PRUNEboolean prune_test(Eqclass *EQ) { ListNodes<Itemset *> *tempnode=EQ->head(); Array<int> *tempAry=new Array<int>(tempnode->item()->iset()); for(tempnode=tempnode->next();tempnode;tempnode=tempnode->next()) { tempAry->add(tempnode->item()->litem()); } boolean TF=MaxSet->subset(tempAry); delete tempAry; return TF;}#endif//herevoid process_cluster(Eqclass *cluster){ //cout << "cluster:" << *cluster ; ListNodes<Itemset *> *hdr, *hdr2,*phdr2,*tempnode; int subset; tempnode=cluster->head(); for (hdr = cluster->head(); hdr; hdr=hdr->next()){ Eqclass *EQ = new Eqclass; phdr2 =hdr; sumtidlist += hdr->item()->tidsize(); for (hdr2=hdr->next(); hdr2 ; ){ sumtidlist += hdr2->item()->tidsize(); Itemset *temp; //cout << "hdr->item" << *hdr->item() ; //cout << "hdr->item()->tid():" << *hdr->item()->tidlist() << endl; //cout << "hdr2->item()" << *hdr2->item() ; //cout << "hdr2->item()->tid():" << *hdr2->item()->tidlist() << endl; if (use_diff) temp=Itemset::diff(hdr->item(), hdr2->item(),0,&subset); else temp=Itemset::intersect(hdr->item(), hdr2->item(),0,&subset); if (hdr->item()->isetsize() > 1) stats.incrcand(hdr->item()->isetsize()); if (temp){ FreCount.add(temp->iset()->size()); stats.incravgtid(temp->iset()->size()-1,temp->tidsize()); switch(subset){ case AeqB: //A=B, A=AB, & delete B //cout << "subset: " << subset << " A=B" << endl; //cout << "hdr->item" << *hdr->item() ; //cout << "hdr->item()->tid():" << *hdr->item()->tidlist() << endl; //cout << "hdr2->item()" << *hdr2->item() ; //cout << "hdr2->item()->tid():" << *hdr2->item()->tidlist() << endl; if (use_diff){ //hdr->item()->insert(hdr2->item()->litem()); hdr->item()->copy(temp->iset(), NULL); //hdr->item()->hval() = temp->hval(); //temp->copy(NULL, hdr->item()->tidlist()); //temp->support() = hdr->item()->support(); //temp->diff() = hdr->item()->diff(); //temp->idrop() = hdr->item()->idrop(); } else{ delete hdr->item(); hdr->item()=new Itemset(*temp); hdr->item()->support() = temp->support(); hdr->item()->diff() = temp->diff(); } for(tempnode= EQ->head(); tempnode; tempnode=tempnode->next()){ //replace A with AB tempnode->item()->insert(hdr2->item()->litem()); FreCount.add(tempnode->item()->iset()->size()); stats.incravgtid(tempnode->item()->iset()->size()-1, tempnode->item()->tidsize()); } if(phdr2){ phdr2->lnext()=hdr2->next(); delete hdr2; hdr2=phdr2; } else { cluster->set_head(hdr2->next()); delete hdr2; hdr2=NULL; } //cout << "after A=B" << *hdr->item() << endl; break; case AsubB: // A<B,replace A with AB //cout << "subset: " << subset << " A<B" << endl; //cout << "temp" << *temp ; //hdr->set_bit(); if (use_diff){ //hdr->item()->insert(hdr2->item()->litem()); hdr->item()->copy(temp->iset(), NULL); //hdr->item()->hval() = temp->hval(); //temp->copy(NULL, hdr->item()->tidlist()); //temp->support() = hdr->item()->support(); //temp->diff() = hdr->item()->diff(); //temp->idrop() = hdr->item()->idrop(); } else { delete hdr->item(); hdr->item()=new Itemset(*temp); hdr->item()->support() = temp->support(); hdr->item()->diff() = temp->diff(); } tempnode=EQ->head(); for(; tempnode; tempnode=tempnode->next()){ //replace A with AB tempnode->item()->insert(hdr2->item()->litem()); FreCount.add(tempnode->item()->iset()->size()); stats.incravgtid(tempnode->item()->iset()->size()-1, tempnode->item()->tidsize()); } //cout << "hdr: " << *hdr->item() << endl; break; case BsubA: // A>B, delete B & save AB //cout << "subset: " << subset << " A>B" << endl; //hdr->set_bit(); if(phdr2) { phdr2->lnext()=hdr2->next(); delete hdr2; hdr2=phdr2; } else { cluster->set_head(hdr2->next()); delete hdr2; hdr2=NULL; } break; case AneqB: //A<>B,nothing //cout << "subset: " << subset << " A<>B" << endl; //hdr->set_bit(); //hdr2->set_bit(); break; } // edn of switch //cout << "join: " << *temp ; if(subset == BsubA || subset == AneqB ){ if (process_order == DE_SUP) EQ->addsort(temp,Itemset::cmp_sup,0); else if (process_order == IN_SUP) EQ->addsort(temp,Itemset::cmp_sup,1); else EQ->append(temp); } //end of if (subset>=2) } //end if temp if(hdr2) { phdr2=hdr2; hdr2=hdr2->next(); } else { cout << "ERROR" << endl; //hdr2=cluster->head(); } }// end of hdr2 CloCount.add(hdr->item()->iset()->size()); //CCif(HT->add(hdr->item())) if (use_output) cout << *hdr->item(); if (EQ->size() > 1){ process_cluster(EQ); delete EQ; } else { if(EQ->size()==1) { CloCount.add(EQ->head()->item()->iset()->size()); //CC if(HT->add(EQ->head()->item())) if (use_output) cout << *EQ->head()->item(); } if(EQ) delete EQ; } //end of EQ->size() } // end of hdr/* for(tempnode=cluster->head();tempnode ;tempnode=tempnode->next()) { if(!tempnode->chbit()){ CloCount.add(tempnode->item()->iset()->size()); cout << *tempnode->item() ; } }*/}void process_F1(Eqclass *F1EQ) { ListNodes<Itemset *> *hdr, *hdr2,*phdr2,*tempnode; int subset; Array <int> *cls=new Array<int>; Itemset *it1; hdr = F1EQ->head(); for (; hdr; hdr=hdr->next()){ if (hdr->item()->memflg()) it1 = hdr->item(); else{ it1 = Memman::read_from_disk(hdr->item()->litem()); delete hdr->item(); //new hdr->item()=it1; //new } stats.incravgtid(0,it1->tidsize()); Eqclass *EQ = new Eqclass; phdr2=hdr; GrNode *grn = (*F2Graph)[hdr->item()->litem()]; cls->reset(); for (int j=0; j < grn->size(); j++){ cls->add((*grn)[j]->adj()); } for (hdr2 = hdr->next(); hdr2 ; hdr2=hdr2->next()){ int i2=hdr2->item()->litem(); //cout << "hdr2:" << *hdr2->item() ; if(cls->search(i2)==-1) { phdr2=hdr2; continue; } //else cout << "found" << endl; //(*Eqclass::iset->iset())[0] = hdr2->item()->litem(); //Eqclass::iset->iset()->size()=1; //Eqclass::iset->memflg() = FALSE; //Memman::read_from_disk(Eqclass::iset,0); Itemset *it2; if (hdr2->item()->memflg()) it2 = hdr2->item(); else{ it2 = Memman::read_from_disk(hdr2->item()->litem()); delete hdr2->item(); hdr2->item() = it2; } //delete Eqclass::iset; Eqclass::iset = it2; Itemset * temp; if (diff_input) temp = Itemset::diff(hdr->item(), Eqclass::iset, 0, &subset); else if (use_diff) temp = Itemset::diff2it(hdr->item(), Eqclass::iset, &subset); else temp = Itemset::intersect2it(hdr->item(), Eqclass::iset, &subset); // cout << "hdr->item" << *hdr->item() ; //cout << "hdr->item()->tid():" << *it1->tidlist() << endl; //cout << "hdr2->item()" << *hdr2->item() ; //cout << "hdr2->item()->tid():" << *Eqclass::iset->tidlist() << endl; if (temp){ FreCount.add(temp->iset()->size()); stats.incravgtid(temp->iset()->size()-1,temp->tidsize()); switch(subset){ case AeqB: //A=B, A=AB, & delete B //cout << "subset: " << subset << " A=B" << endl; if (use_diff){ //hdr->item()->insert(hdr2->item()->litem()); hdr->item()->copy(temp->iset(), NULL); //hdr->item()->hval() = temp->hval(); //temp->copy(NULL, hdr->item()->tidlist()); //temp->support() = hdr->item()->support(); //temp->diff() = hdr->item()->diff(); //temp->idrop() = hdr->item()->idrop(); //hdr->item()->support() = temp->support(); //hdr->item()->diff() = temp->diff(); } else{ delete hdr->item(); hdr->item()=new Itemset(*temp); hdr->item()->support() = temp->support(); hdr->item()->diff() = temp->diff(); } for(tempnode= EQ->head();tempnode; tempnode=tempnode->next()){ //replace A with AB tempnode->item()->insert(hdr2->item()->litem()); FreCount.add(tempnode->item()->iset()->size()); stats.incravgtid(tempnode->item()->iset()->size()-1, tempnode->item()->tidsize()); } phdr2->lnext()=hdr2->next(); delete hdr2; hdr2=phdr2; break; case AsubB: // A<B,replace A with AB //cout << "subset: " << subset << " A<B" << endl; //hdr->set_bit(); if (use_diff){ //hdr->item()->insert(hdr2->item()->litem()); hdr->item()->copy(temp->iset(), NULL); //hdr->item()->hval() = temp->hval(); //temp->copy(NULL, hdr->item()->tidlist()); //temp->support() = hdr->item()->support(); //temp->diff() = hdr->item()->diff(); //temp->idrop() = hdr->item()->idrop(); //hdr->item()->support() = temp->support(); //hdr->item()->diff() = temp->diff(); } else{ delete hdr->item(); hdr->item()=new Itemset(*temp); hdr->item()->support() = temp->support(); hdr->item()->diff() = temp->diff(); } tempnode=EQ->head(); for(; tempnode; tempnode=tempnode->next()){ //replace A with AB tempnode->item()->insert(hdr2->item()->litem()); FreCount.add(tempnode->item()->iset()->size()); stats.incravgtid(tempnode->item()->iset()->size()-1, tempnode->item()->tidsize()); } break; case BsubA: // A>B, delete B & save AB //cout << "subset: " << subset << " A>B" << endl; //hdr->set_bit(); phdr2->lnext()=hdr2->next(); delete hdr2; hdr2=phdr2; break; case AneqB: //A<>B,nothing //cout << "subset: " << subset << " A<>B" << endl; //hdr->set_bit(); //hdr2->set_bit(); break; } // edn of switch //cout << " d:" << temp->idrop() << "join: " << *temp << endl; if(subset == BsubA || subset == AneqB ){ if (process_order == DE_SUP) EQ->addsort(temp,Itemset::cmp_sup,0); else if (process_order == IN_SUP) EQ->addsort(temp,Itemset::cmp_sup,1); else EQ->append(temp); //cout << "EQ:" << *EQ << endl; } //end of if (subset>=2) } //end if temp phdr2=hdr2; //cout<< "F1EQ" << *F1EQ << endl; }// end of hdr2 //if (EQ) cout << "EQ:" << *EQ << endl; CloCount.add(hdr->item()->iset()->size()); //CC if(HT->add(hdr->item())) if (use_output) cout << *hdr->item(); if (EQ->size() > 1){ process_cluster(EQ); delete EQ; } else { if(EQ->size()==1) { CloCount.add(EQ->head()->item()->iset()->size()); //CC if(HT->add(EQ->head()->item())) if (use_output) cout << *EQ->head()->item(); } else if(!hdr->chbit()){ //cout << *hdr->item();#ifdef PRUNE if(hdr->item()->isetsize()>3) MaxSet->add(hdr->item()); //cout << "MaxSet" << *MaxSet << endl;#endif } if(EQ) delete EQ; } // End of EQ->size delete hdr->item(); } // end of hdr/* for(tempnode=F1EQ->head();tempnode ;tempnode=tempnode->next()) { CloCount.add(tempnode->item()->iset()->size()); cout << *tempnode->item() ; }*/}void process_eqc(){ // creat F1EQClass //cout << Graph::numF1 << endl; Eqclass *F1EQ= new Eqclass; float rate=1-((float)(Graph::numF1)/DBASE_MAXITEM);//cout << "rate: " << rate << endl; //unsigned int HTsize=(Graph::numF1)*(Graph::numF1)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate)*(float)(1/rate); unsigned int HTsize=50000; //cout << "HashTable Size: " << HTsize << endl; //CC HT= new HashTable(HTsize); FreCount.kadd(1,F2Graph->size()); for(int i=0; i < F2Graph->size(); i++){ Itemset *F1= new Itemset(1,0); F1->iset()->add(i); F1EQ->append(F1); } process_F1(F1EQ);}void ECLAT_Find_Freq(){ NumLargeItemset.add(Graph::numF1); NumLargeItemset.add(0); process_eqc(); //FreCount.print(); //CloCount.print(); //cout << *HT << endl; //HT->print();// RCloCount.print();}@
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?