📄 fp_tree.cpp
字号:
{ item_order[i] = order[i]; order[i]=-1; } ITEM_NO = itemno; delete []counts; delete Tran;} void FI_tree::insert(int* compact, int counts, int current){ Fnode* child = Root; Fnode* temp, *temp1=NULL; int i=0; while(i<current) { for(temp=child->leftchild; temp!=NULL; temp = temp->rightsibling) { if(temp->itemname==table[compact[i]])break; if(temp->rightsibling==NULL)temp1 = temp; } if(!temp)break; temp->count+=counts; child=temp; i++; } while(i<current) { child = child->append(this, temp1, table[compact[i]], counts); bran[i]++; i++; }}void FI_tree::cal_level_25(){ int i, total_25=0, total_50=0, total_bran=0, maxlen=0; for(i=0; i<this->itemno && bran[i]!=0; i++); maxlen =i; for(i=0; i<int(maxlen*0.25); i++) total_25 +=bran[i]; total_50 = total_25; for(i=int(maxlen*0.25); i<this->itemno*0.5; i++) total_50 +=bran[i]; for(i=0; i<this->itemno && bran[i]!=0; i++) {// cout<<i<<" " <<bran[i]<<endl; total_bran+=bran[i]; bran[i]=0; } level_25 = (double)total_25/total_bran*100;// cout<<"First 25% levels: "<<(double)total_25/total_bran*100<<"% "<<"50% levels: "<<(double)total_50/total_bran*100<<"%"<<endl;}void FI_tree::fill_count(int* origin, int support){ int i, j=0; for(i=0; i<itemno; i++) { if(origin[i]!=-1) { compact[j++]=i; origin[i] = -1; } } if(array) { int comp_len = j; for(i=comp_len-1; i>0 && compact[i]>SUDDEN; i--) for(j=i-1; j>=0; j--) array[itemno-1-compact[i]][compact[i]-compact[j]-1]+=support; }}void FI_tree::scan2_DB(Data *fdat){ int i, j, has; int* origin, *buffer=new int; Transaction *Tran = new Transaction; origin = new int[ITEM_NO]; assert(Tran!=NULL&&origin!=NULL && buffer!=NULL); for(j=0; j<ITEM_NO; j++) origin[j]=-1; for(i=0; i<TRANSACTION_NO; i++) { Tran = fdat->getNextTransaction(Tran); has=0; for(int j=0; j<Tran->length; j++) { if(item_order[Tran->t[j]]!=-1) { has++;// if(origin[item_order[Tran->t[j]]]!=-1)// has--; // in transaction, items are not supposed to appear more than twice origin[item_order[Tran->t[j]]] = item_order[Tran->t[j]]; } } if(has){ fill_count(origin, 1); insert(compact, 1, has); } } cal_level_25(); delete []origin; delete buffer; delete Tran;}void FI_tree::scan1_DB(FI_tree* old_tree){ int i, j; int* old_order = old_tree->order; for(i=0; i< itemno; i++) { count[i]=supp[old_order[list->FS[i+list->top-itemno]]]; table[i]=list->FS[i+list->top-itemno]; supp[old_order[list->FS[i+list->top-itemno]]]=0; } sort(count, table, 0, itemno-1); for(i=0; i<itemno; i++) { order[table[i]]=i; head[i] = (Fnode*)fp_buf->newbuf(1, sizeof(Fnode)); head[i]->init(NULL, table[i], count[i]); } if(itemno > SUDDEN+5 && old_tree->level_25 > SWITCH) { array = (int**)fp_buf->newbuf(itemno-1-SUDDEN, sizeof(int*)); for(i=0; i<itemno-1-SUDDEN; i++) { array[i] = (int*)fp_buf->newbuf(itemno-1-i, sizeof(int)); for(j=0; j<itemno-1-i; j++) array[i][j] = 0; } }else array = NULL;}void FI_tree::scan2_DB(FI_tree* old_tree, Fnode* node){ int i, has, current; int* origin; Fnode* link, *parent; int* old_order; old_order = old_tree->order; current = old_order[node->itemname]; origin = new int[current]; assert(origin!=NULL); for(i=0; i<current; i++) origin[i]=-1; for(link=node->next; link!=NULL; link=link->next) { has=0; for(parent=link->par; parent->itemname!=-1; parent=parent->par) if(order[parent->itemname]!=-1) { origin[order[parent->itemname]]=parent->itemname; has++; } if(has) { fill_count(origin, link->count); insert(compact, link->count, has); } } cal_level_25(); delete []origin;}void FI_tree::powerset(int*prefix, int prefixlen, int* items, int current, int itlen, FSout* fout )const{ if(current==itlen) { if(prefixlen!=0) { ITlen[list->top+prefixlen-1]++; if(fout) { fout->printset(list->top, list->FS); fout->printSet(prefixlen, prefix, this->head[order[prefix[prefixlen-1]]]->count); } } }else{ current++; powerset(prefix, prefixlen, items, current, itlen, fout); current--; prefix[prefixlen++]=items[current++]; powerset(prefix, prefixlen, items, current, itlen, fout); }} void FI_tree::generate_all(int new_item_no, FSout* fout)const{ powerset(prefix, 0, list->FS, list->top, list->top+new_item_no, fout);}bool FI_tree::Single_path(bool close)const{ Fnode* node; for(node=Root->leftchild; node!=NULL; node=node->leftchild) if(node->rightsibling!=NULL)return false; if(close) for(node=Root->leftchild; node!=NULL; node=node->leftchild) { list->FS[list->top] = node->itemname; list->counts[list->top++] = node->count; } return true;}memory* FI_tree::allocate_buf(int sequence, int iteration, int i) // power2[i] is the smallest block size for memory{ memory* buf; int blocks=60; int j=0; if(itemno<=100)j=sequence/10; else if(itemno>100 && itemno<=500)j=sequence/50; if(itemno>500)j = sequence/100; switch(iteration){ case -1: if(j<=4) buf=new memory(blocks, power2[i+j], power2[i+j+1], 2); else buf=new memory(blocks, power2[i+5], power2[i+6], 2); break; case 0: if(j<=3) buf=new memory(blocks, power2[i+j], power2[i+j+1], 2); else buf=new memory(blocks, power2[i+4], power2[i+5], 2); break; case 1: if(j<=2) buf=new memory(blocks, power2[i+j], power2[i+j+1], 2); else buf=new memory(blocks, power2[i+3], power2[i+4], 2); break; case 2: if(j<=1) buf=new memory(blocks, power2[i+j], power2[i+j+1], 2); else buf=new memory(blocks, power2[i+2], power2[i+3], 2); break; case 3: if(j<=0) buf=new memory(blocks, power2[i+j], power2[i+j+1], 2); else buf=new memory(blocks, power2[i+1], power2[i+2], 2); break; default: buf=new memory(blocks, power2[i], power2[i+1], 2); } return buf;}int FI_tree::conditional_pattern_base(Fnode* node, bool close)const{ Fnode* temp, *parent; int i; for(temp=node->next; temp!=NULL; temp=temp->next) { parent=temp->par; for(; parent->itemname!=-1;parent=parent->par) supp[order[parent->itemname]]+=temp->count; } int k=0; for(i=0; i<order[node->itemname]; i++) { if(supp[i]>=THRESHOLD) { k++; list->FS[list->top++]=table[i]; if(close)list->counts[list->top-1] = supp[i]; }else supp[i] = 0; } return k;}int FI_tree::conditional_pattern_base(int itemname, bool close)const{ int i, k=0, item = itemno-1-order[itemname]; for(i=itemno-2-item; i>=0;i--) if(array[item][i]>=THRESHOLD) { k++; list->FS[list->top++]=table[itemno-2-item-i]; supp[itemno-2-item-i]=array[item][i]; if(close)list->counts[list->top-1] = array[item][i]; } return k;}int FI_tree::FP_growth(FSout* fout){ int i, sequence, current, new_item_no, listlen; int MC=0; //markcount for memory unsigned int MR=0; //markrest for memory char* MB; //markbuf for memory Fnode* Current; for(sequence=itemno-1; sequence>=0; sequence--) { Current=head[sequence]; current=head[sequence]->itemname; list->FS[list->top++]=head[sequence]->itemname; listlen = list->top; ITlen[list->top-1]++; if(fout) fout->printSet(list->top, list->FS, this->head[sequence]->count); if(array && sequence>SUDDEN+1) new_item_no=conditional_pattern_base(Current->itemname); //new_item_no is the number of elements in new header table. else if(sequence !=0) new_item_no=conditional_pattern_base(Current); //new_item_no is the number of elements in new header table. else new_item_no = 0; if(new_item_no==0 || new_item_no == 1) { if(new_item_no==1) { ITlen[list->top-1]++; if(fout) fout->printSet(list->top, list->FS, supp[order[list->FS[list->top-1]]]); } if(new_item_no==1)supp[order[list->FS[list->top-1]]] = 0; list->top=listlen-1; continue; } FI_tree *fptree; MB=fp_buf->bufmark(&MR, &MC); fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree)); fptree->init(this->itemno, new_item_no); fptree->scan1_DB(this); fptree->scan2_DB(this, Current); list->top=listlen; if(fptree->Single_path()) { /* patch Oct. 9, 2003*/ Fnode* node; for(node=fptree->Root->leftchild; node!=NULL; node=node->leftchild) list->FS[list->top++] = node->itemname; list->top = listlen; /*************************/ fptree->generate_all(new_item_no, fout); list->top--; }else{ //not single path i = fptree->FP_growth(fout); list->top = listlen-1; } fp_buf->freebuf(MR, MC, MB); } return 0;}int FI_tree::FPmax(FSout* fout){ static int ms=9; //power2[i] is the smallest block size for memory 2**9 = 512 int i, sequence, current, new_item_no, listlen; int MC=0; //markcount for memory unsigned int MR=0; //markrest for memory char* MB; //markbuf for memory Fnode* Current; for(sequence=itemno-1; sequence>=0; sequence--) { Current=head[sequence]; current=head[sequence]->itemname; list->FS[list->top++]=head[sequence]->itemname; listlen = list->top; if(array && sequence>SUDDEN+1) new_item_no=conditional_pattern_base(Current->itemname); //new_item_no is the number of elements in new header table. else if(sequence !=0) new_item_no=conditional_pattern_base(Current); //new_item_no is the number of elements in new header table. else new_item_no = 0; if(LMaxsets->is_subset()) { for(int j=listlen; j < list->top; j++)supp[order[list->FS[j]]] = 0; list->top=listlen-1; if(new_item_no == sequence)return new_item_no; continue; } if(new_item_no==0 || new_item_no == 1) { ITlen[list->top-1]++; if(fout){ if(new_item_no==1) fout->printSet(list->top, list->FS, supp[order[list->FS[list->top-1]]]); else fout->printSet(list->top, list->FS, this->head[sequence]->count); } LMaxsets->insert(list->FS, LMaxsets->posi+1, list->top+new_item_no-1); update_mfi_trees(LMaxsets->posi+1); list->top = listlen+1; if(new_item_no==1)supp[order[list->FS[list->top-1]]] = 0; list->top=listlen-1; if(new_item_no != sequence)continue; return new_item_no; } FI_tree *fptree; MB=fp_buf->bufmark(&MR, &MC); fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree)); fptree->init(this->itemno, new_item_no); fptree->scan1_DB(this); fptree->scan2_DB(this, Current); list->top=listlen; if(fptree->Single_path()) { if(fout) { fout->printSet(list->top+new_item_no, list->FS, fptree->head[fptree->itemno-1]->count); } ITlen[list->top+new_item_no-1]++; LMaxsets->insert(list->FS, LMaxsets->posi+1, list->top+new_item_no); list->top = list->top+new_item_no; update_mfi_trees(LMaxsets->posi+1); list->top = listlen; list->top--; if(new_item_no == sequence) { fp_buf->freebuf(MR, MC, MB); return new_item_no; } }else{ //not single path memory* Max_buf; Max_buf=allocate_buf(sequence, LMaxsets->posi, ms); MFI_tree* new_LMFI = (MFI_tree*)Max_buf->newbuf(1, sizeof(MFI_tree)); new_LMFI->init(Max_buf, fptree, LMaxsets, LMaxsets->head[sequence], list->top-1); fptree->set_max_tree(new_LMFI); mfitrees[LMaxsets->posi+2] = new_LMFI; i=fptree->FPmax(fout); list->top = new_LMFI->posi; if(Max_buf->half())ms++; delete Max_buf; if(i+1 == sequence) { fp_buf->freebuf(MR, MC, MB); return i+1; } } fp_buf->freebuf(MR, MC, MB); } return 0; }int FI_tree::FPclose(FSout* fout){ static int ms=9; //power2[i] is the smallest block size for memory 2**9 = 512 int i, sequence, current, new_item_no, listlen; int MC=0; //markcount for memory unsigned int MR=0; //markrest for memory char* MB; //markbuf for memory Fnode* Current; for(sequence=itemno-1; sequence>=0; sequence--) { Current=head[sequence]; current=head[sequence]->itemname; list->FS[list->top++]=head[sequence]->itemname; list->counts[list->top-1] = this->head[sequence]->count; listlen = list->top; if(LClose->is_subset(this->head[sequence]->count)) { list->top=listlen-1; continue; } if(array && sequence>SUDDEN+1) new_item_no=conditional_pattern_base(Current->itemname, 1); //new_item_no is the number of elements in new header table. else if(sequence !=0) new_item_no=conditional_pattern_base(Current, 1); //new_item_no is the number of elements in new header table. else new_item_no = 0; if(new_item_no==0 || new_item_no == 1) { if(new_item_no==0) { ITlen[list->top-1]++; if(fout) fout->printSet(list->top, list->FS, this->head[sequence]->count); LClose->insert(list->FS, LClose->posi+1, list->top+new_item_no-1, this->head[sequence]->count); update_cfi_trees(LClose->posi+1, this->head[sequence]->count); }else{ list->top=listlen;//Bug! Dec. 5/* if(LClose->generate_close(1, this->head[sequence]->count, fout)) { supp[order[list->FS[listlen]]] = 0; list->top=listlen-1; return new_item_no; }else supp[order[list->FS[listlen]]] = 0;*/ LClose->generate_close(1, this->head[sequence]->count, fout); supp[order[list->FS[listlen]]] = 0; } list->top=listlen-1; continue; }/* int j; for(j=listlen; j<new_item_no+listlen && list->counts[listlen-1]!=list->counts[j]; j++); if(j==new_item_no+listlen) { ITlen[listlen-1]++; if(fout) fout->printSet(listlen, list->FS, this->head[sequence]->count); LClose->insert(list->FS, LClose->posi+1, listlen, this->head[sequence]->count); list->top = listlen; update_cfi_trees(LClose->posi+1, this->head[sequence]->count); list->top += new_item_no; }*/ FI_tree *fptree; MB=fp_buf->bufmark(&MR, &MC); fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree)); fptree->init(this->itemno, new_item_no); fptree->scan1_DB(this); fptree->scan2_DB(this, Current); list->top=listlen; if(fptree->Single_path(1)) { list->top=listlen; if(LClose->generate_close(new_item_no, this->head[sequence]->count, fout)) { if(new_item_no == sequence) { fp_buf->freebuf(MR, MC, MB); list->top = listlen-1; return new_item_no; } } list->top = listlen-1; }else{ //not single path /* patch Oct. 9, 2003 */ int j; for(j=listlen; j<new_item_no+listlen && list->counts[listlen-1]!=list->counts[j]; j++); if(j==new_item_no+listlen) { ITlen[listlen-1]++; if(fout) fout->printSet(listlen, list->FS, this->head[sequence]->count); LClose->insert(list->FS, LClose->posi+1, listlen, this->head[sequence]->count); list->top = listlen; update_cfi_trees(LClose->posi+1, this->head[sequence]->count);// list->top += new_item_no; } /***************************/ memory* Close_buf; Close_buf=allocate_buf(sequence, LClose->posi, ms); CFI_tree* new_LClose = (CFI_tree*)Close_buf->newbuf(1, sizeof(CFI_tree)); new_LClose->init(Close_buf, fptree, LClose, LClose->head[sequence], list->top-1); fptree->set_close_tree(new_LClose); cfitrees[LClose->posi+2] = new_LClose; i=fptree->FPclose(fout); list->top = new_LClose->posi; if(Close_buf->half())ms++; delete Close_buf; } fp_buf->freebuf(MR, MC, MB); } return 0; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -