⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fp_tree.cpp

📁 关联规则挖掘算法FPtree的源代码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	{		item_order[i] = order[i];		order[i]=-1;	}	ITEM_NO = itemno;	delete []counts;	delete Tran;}	void FI_tree::insert(int* compact, int counts, int current){	Fnode* child = Root;	Fnode* temp, *temp1=NULL;	int i=0;	while(i<current)	{		for(temp=child->leftchild; temp!=NULL; temp = temp->rightsibling)		{			if(temp->itemname==table[compact[i]])break;			if(temp->rightsibling==NULL)temp1 = temp;		}		if(!temp)break;		temp->count+=counts;		child=temp;		i++;	}	while(i<current)	{   		child = child->append(this, temp1, table[compact[i]], counts);		bran[i]++;		i++;	}}void FI_tree::cal_level_25(){	int i, total_25=0, total_50=0, total_bran=0, maxlen=0;	for(i=0; i<this->itemno && bran[i]!=0; i++);	maxlen =i;	for(i=0; i<int(maxlen*0.25); i++)		total_25 +=bran[i];	total_50 = total_25;	for(i=int(maxlen*0.25); i<this->itemno*0.5; i++)		total_50 +=bran[i];	for(i=0; i<this->itemno && bran[i]!=0; i++)	{//		cout<<i<<" " <<bran[i]<<endl;		total_bran+=bran[i];		bran[i]=0;	}	level_25 = (double)total_25/total_bran*100;//	cout<<"First 25% levels: "<<(double)total_25/total_bran*100<<"%  "<<"50% levels: "<<(double)total_50/total_bran*100<<"%"<<endl;}void FI_tree::fill_count(int* origin, int support){	int i, j=0;	for(i=0; i<itemno; i++)	{		if(origin[i]!=-1)		{			compact[j++]=i;			origin[i] = -1;		}	}	if(array)	{		int comp_len = j;		for(i=comp_len-1; i>0 && compact[i]>SUDDEN; i--)			for(j=i-1; j>=0; j--)				array[itemno-1-compact[i]][compact[i]-compact[j]-1]+=support;	}}void FI_tree::scan2_DB(Data *fdat){	int i, j, has;	int* origin, *buffer=new int;	Transaction *Tran = new Transaction;	origin = new int[ITEM_NO];	assert(Tran!=NULL&&origin!=NULL && buffer!=NULL);	for(j=0; j<ITEM_NO; j++) origin[j]=-1;	for(i=0; i<TRANSACTION_NO; i++)	{		Tran = fdat->getNextTransaction(Tran);		has=0;		for(int j=0; j<Tran->length; j++) 		{			if(item_order[Tran->t[j]]!=-1)			{				has++;//				if(origin[item_order[Tran->t[j]]]!=-1)//					has--;         // in transaction, items are not supposed to appear more than twice				origin[item_order[Tran->t[j]]] = item_order[Tran->t[j]];			}		}		if(has){			fill_count(origin, 1);			insert(compact, 1, has);		}	}	cal_level_25();	delete []origin;	delete buffer;	delete Tran;}void FI_tree::scan1_DB(FI_tree* old_tree){	int i, j;	int* old_order = old_tree->order;	for(i=0; i< itemno; i++)	{		count[i]=supp[old_order[list->FS[i+list->top-itemno]]];		table[i]=list->FS[i+list->top-itemno];		supp[old_order[list->FS[i+list->top-itemno]]]=0;	}	sort(count, table, 0, itemno-1);	for(i=0; i<itemno; i++)	{		order[table[i]]=i;		head[i] = (Fnode*)fp_buf->newbuf(1, sizeof(Fnode));		head[i]->init(NULL, table[i], count[i]);			}	if(itemno > SUDDEN+5 && old_tree->level_25 > SWITCH)	{		array = (int**)fp_buf->newbuf(itemno-1-SUDDEN, sizeof(int*));		for(i=0; i<itemno-1-SUDDEN; i++)		{			array[i] = (int*)fp_buf->newbuf(itemno-1-i, sizeof(int));			for(j=0; j<itemno-1-i; j++)				array[i][j] = 0;		}	}else array = NULL;}void FI_tree::scan2_DB(FI_tree* old_tree, Fnode* node){	int i, has, current;	int* origin;	Fnode* link, *parent;	int* old_order;		old_order = old_tree->order;	current = old_order[node->itemname];	origin = new int[current];	assert(origin!=NULL);	for(i=0; i<current; i++) origin[i]=-1;		for(link=node->next; link!=NULL; link=link->next)	{			has=0;		for(parent=link->par; parent->itemname!=-1; parent=parent->par)			if(order[parent->itemname]!=-1)			{				origin[order[parent->itemname]]=parent->itemname;				has++;			}		if(has)		{			fill_count(origin, link->count);			insert(compact, link->count, has);		}	}	cal_level_25();	delete []origin;}void FI_tree::powerset(int*prefix, int prefixlen, int* items, int current, int itlen, FSout* fout )const{	if(current==itlen)	{		if(prefixlen!=0)		{				ITlen[list->top+prefixlen-1]++;			if(fout)			{				fout->printset(list->top, list->FS);				fout->printSet(prefixlen, prefix, this->head[order[prefix[prefixlen-1]]]->count);			}		}	}else{		current++;		powerset(prefix, prefixlen, items, current, itlen, fout);		current--;		prefix[prefixlen++]=items[current++];		powerset(prefix, prefixlen, items, current, itlen, fout);	}}	void FI_tree::generate_all(int new_item_no, FSout* fout)const{	powerset(prefix, 0, list->FS, list->top, list->top+new_item_no, fout);}bool FI_tree::Single_path(bool close)const{	Fnode* node;	for(node=Root->leftchild; node!=NULL; node=node->leftchild)		if(node->rightsibling!=NULL)return false;	if(close)		for(node=Root->leftchild; node!=NULL; node=node->leftchild)		{			list->FS[list->top] = node->itemname;			list->counts[list->top++] = node->count;		}	return true;}memory* FI_tree::allocate_buf(int sequence, int iteration, int i)				//	power2[i] is the smallest block size for memory{	memory* buf;					int blocks=60;	int j=0;	if(itemno<=100)j=sequence/10;	else if(itemno>100 && itemno<=500)j=sequence/50;	if(itemno>500)j = sequence/100;	switch(iteration){	case -1:			if(j<=4)			buf=new memory(blocks, power2[i+j], power2[i+j+1], 2);		else			buf=new memory(blocks, power2[i+5], power2[i+6], 2);		break;	case 0:			if(j<=3)			buf=new memory(blocks, power2[i+j], power2[i+j+1], 2);		else			buf=new memory(blocks, power2[i+4], power2[i+5], 2);		break;	case 1:			if(j<=2)			buf=new memory(blocks, power2[i+j], power2[i+j+1], 2);		else			buf=new memory(blocks, power2[i+3], power2[i+4], 2);		break;	case 2:			if(j<=1)			buf=new memory(blocks, power2[i+j], power2[i+j+1], 2);		else			buf=new memory(blocks, power2[i+2], power2[i+3], 2);		break;	case 3:			if(j<=0)			buf=new memory(blocks, power2[i+j], power2[i+j+1], 2);		else			buf=new memory(blocks, power2[i+1], power2[i+2], 2);		break;	default:		buf=new memory(blocks, power2[i], power2[i+1], 2);	}	return buf;}int FI_tree::conditional_pattern_base(Fnode* node, bool close)const{	Fnode* temp, *parent;	int i;	for(temp=node->next; temp!=NULL; temp=temp->next)	{		parent=temp->par;		for(; parent->itemname!=-1;parent=parent->par)			supp[order[parent->itemname]]+=temp->count;	}	int k=0;	for(i=0; i<order[node->itemname]; i++)	{		if(supp[i]>=THRESHOLD)		{			k++;			list->FS[list->top++]=table[i];			if(close)list->counts[list->top-1] = supp[i];		}else			supp[i] = 0;	}	return k;}int FI_tree::conditional_pattern_base(int itemname, bool close)const{	int i, k=0, item = itemno-1-order[itemname];	for(i=itemno-2-item; i>=0;i--)		if(array[item][i]>=THRESHOLD)		{			k++;			list->FS[list->top++]=table[itemno-2-item-i];			supp[itemno-2-item-i]=array[item][i];			if(close)list->counts[list->top-1] = array[item][i];		}	return k;}int FI_tree::FP_growth(FSout* fout){	int i, sequence, current, new_item_no, listlen;	int MC=0;			//markcount for memory	unsigned int MR=0;	//markrest for memory	char* MB;			//markbuf for memory	Fnode* Current;	for(sequence=itemno-1; sequence>=0; sequence--)	{		Current=head[sequence];		current=head[sequence]->itemname;		list->FS[list->top++]=head[sequence]->itemname;		listlen = list->top;		ITlen[list->top-1]++;		if(fout)			fout->printSet(list->top, list->FS, this->head[sequence]->count);				if(array && sequence>SUDDEN+1)			new_item_no=conditional_pattern_base(Current->itemname);  //new_item_no is the number of elements in new header table.		else if(sequence !=0)			new_item_no=conditional_pattern_base(Current);  //new_item_no is the number of elements in new header table.		else			new_item_no = 0;				if(new_item_no==0 || new_item_no == 1)		{			if(new_item_no==1)			{				ITlen[list->top-1]++;				if(fout)					fout->printSet(list->top, list->FS, supp[order[list->FS[list->top-1]]]);			}			if(new_item_no==1)supp[order[list->FS[list->top-1]]] = 0;			list->top=listlen-1;			continue;		}		FI_tree *fptree;		MB=fp_buf->bufmark(&MR, &MC);		fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree));		fptree->init(this->itemno, new_item_no);		fptree->scan1_DB(this);		fptree->scan2_DB(this, Current);		list->top=listlen;									if(fptree->Single_path())		{                               /* patch   Oct. 9, 2003*/                        Fnode* node;                        for(node=fptree->Root->leftchild; node!=NULL; node=node->leftchild)                                list->FS[list->top++] = node->itemname;                        list->top = listlen;                              /*************************/			fptree->generate_all(new_item_no, fout);			list->top--;		}else{             //not single path			i = fptree->FP_growth(fout);			list->top = listlen-1;		}		fp_buf->freebuf(MR, MC, MB);	}	return 0;}int FI_tree::FPmax(FSout* fout){	static int ms=9;		//power2[i] is the smallest block size for memory  2**9 = 512	int i, sequence, current, new_item_no, listlen;	int MC=0;			//markcount for memory	unsigned int MR=0;	//markrest for memory	char* MB;			//markbuf for memory	Fnode* Current;	for(sequence=itemno-1; sequence>=0; sequence--)	{		Current=head[sequence];		current=head[sequence]->itemname;		list->FS[list->top++]=head[sequence]->itemname;		listlen = list->top;		if(array && sequence>SUDDEN+1)			new_item_no=conditional_pattern_base(Current->itemname);  //new_item_no is the number of elements in new header table.		else if(sequence !=0)			new_item_no=conditional_pattern_base(Current);  //new_item_no is the number of elements in new header table.		else			new_item_no = 0;				if(LMaxsets->is_subset())            		{			for(int j=listlen; j < list->top; j++)supp[order[list->FS[j]]] = 0;			list->top=listlen-1;						if(new_item_no == sequence)return new_item_no;  			continue;		}		if(new_item_no==0 || new_item_no == 1)		{			ITlen[list->top-1]++;			if(fout){				if(new_item_no==1)					fout->printSet(list->top, list->FS, supp[order[list->FS[list->top-1]]]);				else					fout->printSet(list->top, list->FS, this->head[sequence]->count);			}			LMaxsets->insert(list->FS, LMaxsets->posi+1, list->top+new_item_no-1);			update_mfi_trees(LMaxsets->posi+1);			list->top = listlen+1;			if(new_item_no==1)supp[order[list->FS[list->top-1]]] = 0;			list->top=listlen-1;			if(new_item_no != sequence)continue;			return new_item_no;		}		FI_tree *fptree;		MB=fp_buf->bufmark(&MR, &MC);		fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree));		fptree->init(this->itemno, new_item_no);		fptree->scan1_DB(this);		fptree->scan2_DB(this, Current);		list->top=listlen;									if(fptree->Single_path())		{			if(fout)			{				fout->printSet(list->top+new_item_no, list->FS, fptree->head[fptree->itemno-1]->count);			}			ITlen[list->top+new_item_no-1]++;			LMaxsets->insert(list->FS, LMaxsets->posi+1, list->top+new_item_no);			list->top = list->top+new_item_no;			update_mfi_trees(LMaxsets->posi+1);			list->top = listlen;			list->top--;			if(new_item_no == sequence)			{				fp_buf->freebuf(MR, MC, MB);				return new_item_no;			}		}else{             //not single path			memory* Max_buf;			Max_buf=allocate_buf(sequence, LMaxsets->posi, ms);			MFI_tree* new_LMFI = (MFI_tree*)Max_buf->newbuf(1, sizeof(MFI_tree));			new_LMFI->init(Max_buf, fptree, LMaxsets, LMaxsets->head[sequence], list->top-1);			fptree->set_max_tree(new_LMFI);			mfitrees[LMaxsets->posi+2] = new_LMFI;			i=fptree->FPmax(fout);							list->top = new_LMFI->posi;			if(Max_buf->half())ms++;			delete Max_buf;			if(i+1 == sequence)			{				fp_buf->freebuf(MR, MC, MB);				return i+1;			}		}		fp_buf->freebuf(MR, MC, MB);	}	return 0;   }int FI_tree::FPclose(FSout* fout){	static int ms=9;		//power2[i] is the smallest block size for memory  2**9 = 512	int i, sequence, current, new_item_no, listlen;	int MC=0;			//markcount for memory	unsigned int MR=0;	//markrest for memory	char* MB;			//markbuf for memory	Fnode* Current;	for(sequence=itemno-1; sequence>=0; sequence--)	{		Current=head[sequence];		current=head[sequence]->itemname;		list->FS[list->top++]=head[sequence]->itemname;		list->counts[list->top-1] = this->head[sequence]->count;		listlen = list->top;		if(LClose->is_subset(this->head[sequence]->count))		{				list->top=listlen-1;			continue;		}		if(array && sequence>SUDDEN+1)			new_item_no=conditional_pattern_base(Current->itemname, 1);  //new_item_no is the number of elements in new header table.		else if(sequence !=0)			new_item_no=conditional_pattern_base(Current, 1);  //new_item_no is the number of elements in new header table.		else			new_item_no = 0;				if(new_item_no==0 || new_item_no == 1)		{			if(new_item_no==0)			{				ITlen[list->top-1]++;				if(fout)					fout->printSet(list->top, list->FS, this->head[sequence]->count);				LClose->insert(list->FS, LClose->posi+1, list->top+new_item_no-1, this->head[sequence]->count);				update_cfi_trees(LClose->posi+1, this->head[sequence]->count);			}else{				list->top=listlen;//Bug! Dec. 5/*				if(LClose->generate_close(1, this->head[sequence]->count, fout))				{						supp[order[list->FS[listlen]]] = 0;					list->top=listlen-1;					return new_item_no;					}else					supp[order[list->FS[listlen]]] = 0;*/				LClose->generate_close(1, this->head[sequence]->count, fout);				supp[order[list->FS[listlen]]] = 0;			}			list->top=listlen-1;			continue;		}/*		int j;		for(j=listlen; j<new_item_no+listlen && list->counts[listlen-1]!=list->counts[j]; j++);		if(j==new_item_no+listlen)		{			ITlen[listlen-1]++;			if(fout)				fout->printSet(listlen, list->FS, this->head[sequence]->count);						LClose->insert(list->FS, LClose->posi+1, listlen, this->head[sequence]->count);			list->top = listlen;			update_cfi_trees(LClose->posi+1, this->head[sequence]->count);			list->top += new_item_no;		}*/		FI_tree *fptree;		MB=fp_buf->bufmark(&MR, &MC);		fptree = (FI_tree*)fp_buf->newbuf(1, sizeof(FI_tree));		fptree->init(this->itemno, new_item_no);		fptree->scan1_DB(this);		fptree->scan2_DB(this, Current);		list->top=listlen;									if(fptree->Single_path(1))		{			list->top=listlen;										if(LClose->generate_close(new_item_no, this->head[sequence]->count, fout))			{				if(new_item_no == sequence)				{					fp_buf->freebuf(MR, MC, MB);					list->top = listlen-1;					return new_item_no;				}			}								list->top = listlen-1;		}else{             //not single path                               /* patch   Oct. 9, 2003 */                        int j;                        for(j=listlen; j<new_item_no+listlen && list->counts[listlen-1]!=list->counts[j]; j++);                        if(j==new_item_no+listlen)                        {                                ITlen[listlen-1]++;                                if(fout)                                        fout->printSet(listlen, list->FS, this->head[sequence]->count);                                LClose->insert(list->FS, LClose->posi+1, listlen, this->head[sequence]->count);                                list->top = listlen;                                update_cfi_trees(LClose->posi+1, this->head[sequence]->count);//                              list->top += new_item_no;                        }                              /***************************/			memory* Close_buf;			Close_buf=allocate_buf(sequence, LClose->posi, ms);			CFI_tree* new_LClose = (CFI_tree*)Close_buf->newbuf(1, sizeof(CFI_tree));			new_LClose->init(Close_buf, fptree, LClose, LClose->head[sequence], list->top-1);			fptree->set_close_tree(new_LClose);			cfitrees[LClose->posi+2] = new_LClose;			i=fptree->FPclose(fout);						list->top = new_LClose->posi;			if(Close_buf->half())ms++;			delete Close_buf;		}		fp_buf->freebuf(MR, MC, MB);	}	return 0;   }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -