fptree.cpp

来自「关联规则中转换树算法在VC下的实现」· C++ 代码 · 共 453 行

CPP
453
字号
// FPtree.cpp: implementation of the CFPtree class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "AssocRule.h"
#include "FPtree.h"
#include "FSout.h"
#include "buffer.h"
#include "common.h"

#include <assert.h>
#include <sys/timeb.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

int TransLen;			// max length of transactions or 80
int ItemNum;			// number of all items
int fisLen;				// length of frequent 1-itemset
ItemType  *f1item;		// frequent 1-itemset orderd by frequency descending
CountType *f1count;		// frequency of frequent 1-itemset
double totalFIs;
int shown;
memory* fp_buf;			// 指向树存储区的指针

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CFPtree::CFPtree()
{

}

CFPtree::~CFPtree()
{

}

void CFPtree::main()
{
	CString strTmp;
	int  i;
	
	double time1, time2, time3; 
	struct timeb tm;
	char *timeline;
	
	head=NULL;
	TransLen=TRANSLEN;
	totalFIs=0;
	
	ftime (&tm);
	timeline = ctime( & ( tm.time ) );
	time1 = (double)tm.time+tm.millitm/1000.;

	data* fdat=new data(infile);
	
	if(!fdat->isOpen()) {
		strTmp.Format("Input file %s could not be opened!", infile);
		AfxMessageBox(strTmp);
		exit(2);
	}
	
	char str1[255];
	FSout* fsout;
	fsout = new FSout(outfile);
	if (!fsout->isOpen())
	{
		strTmp.Format("Output file %s could not be opened!", outfile);
		AfxMessageBox(strTmp);
		exit(3);
	}

	FSout* flog;
	flog = new FSout(logfile);
	if (!flog->isOpen())
	{
		strTmp.Format("Output file %s could not be opened!", logfile);
		AfxMessageBox(strTmp);
		exit(3);
	}
	
	sprintf(str1, "Input data file: %s  ", infile);
	flog->printstring(str1);
	if (FItype==MFI)
		sprintf(str1, "\nMining algorithm: TransTree for mining MFI.");
	else if (FItype==FI)
		sprintf(str1, "\nMining algorithm: TransTree for mining FI.");
	
	flog->printstring(str1);
	sprintf(str1, "\nMinimal support threshold: %7.2f %%  ", fMinSupp);
	flog->printstring(str1);
	sprintf(str1, "\nMinimal confidence threshold: %7.2f %%  ", fMinConf);
	flog->printstring(str1);
	
	sprintf(str1, "\n\nStart time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
	flog->printstring(str1);

	fp_buf=new memory(30, 2097152, 4194304L, 2);
//	fp_buf=new memory(60, 4194304L, 8388608L, 2);
	
	app->m_strEdit.AppendString("第一次读交易数据, 产生频繁1-项集......");
	
	scan1_data(fdat);

	ftime (&tm);
	timeline = ctime( & ( tm.time ) );
	time2 = (double)tm.time+tm.millitm/1000.;

	sprintf(str1, "\nFirst scan end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
	flog->printstring(str1);
	sprintf(str1, "\nFirst scan cost: %12.3f seconds\n",time2-time1);
	flog->printstring(str1);
	sprintf(str1, "Total transactions: %d \n",TransNum);
	flog->printstring(str1);
	sprintf(str1, "Absolute support threshold: %d \n",iSupport);
	flog->printstring(str1);
	sprintf(str1, "\nSize of frequent 1-itemset: %ld . ", fisLen);
	flog->printstring(str1);

	app->m_strEdit.AppendString("第二次读交易数据, 生成 FP-树......");
	
	fptree=new CTreeNode;

	scan2_data(fdat);
	fdat->close();

	ftime (&tm);
	timeline = ctime( & ( tm.time ) );
	time3 = (double)tm.time+tm.millitm/1000.;
	
	sprintf(str1, "\nSecond scan (construct CFP-tree) end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
	flog->printstring(str1);
	sprintf(str1, "\nSecond scan cost: %12.3f seconds\n",time3-time2);
	flog->printstring(str1);
	sprintf(str1, "\nMaximal FI number in a transaction is: %d . ", max_num_items);
	flog->printstring(str1);
	
	// 连接结点链
	rear = new NodePtr[fisLen];  // 创建项尾指针表
	nodecount = new int[fisLen];
	assert (rear != NULL && nodecount != NULL);
	
	for (i=0; i<fisLen; i++)
	{
		rear[i]=NULL;
		nodecount[i]=0;
	}
	
	Node *pRt=fptree->getRoot();
	DrawNodeLink(pRt, pRt->psLink);
	pRt->psLink=NULL;
	
	ftime (&tm);
	timeline = ctime( & ( tm.time ) );
	time2 = (double)tm.time+tm.millitm/1000.;
	
	sprintf(str1, "\nBuilding node-link end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
	flog->printstring(str1);
	sprintf(str1, "\nBuilding node-link cost: %12.3f seconds\n",time2-time3);
	flog->printstring(str1);
	
	int totalNodes=1;
	for (i=0; i<fisLen; i++)
		totalNodes += nodecount[i];
	
	sprintf(str1, "\nThere are %d nodes in the tree. \n\n", totalNodes);
	flog->printstring(str1);

	app->m_strEdit.AppendString("开始挖掘 FP-树,生成频繁项集......");

	suffix=new ItemType[max_num_items];
	suffixlen=0;
	FPmine(fsout, 0, fisLen-1);

	ftime (&tm);
	timeline = ctime( & ( tm.time ) );
	time3 = (double)tm.time+tm.millitm/1000.;
	
	sprintf(str1, "\n\ntotal number of frequent itemsets: %10.0f\n", totalFIs );
	flog->printstring(str1);
	
	sprintf(str1, "\nMining end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
	flog->printstring(str1);
	sprintf(str1, "\nMining cost: %12.3f seconds\n",time3-time2);
	flog->printstring(str1);
	sprintf(str1, "\nTotal cost: %12.3f seconds\n",time3-time1);
	flog->printstring(str1);
	
	fsout->close();
	flog->close();

	delete fptree;
	delete fp_buf;

	delete []rear;
	delete []f1item;
	delete []f1count;
	delete []head;
	delete []nodecount;
	delete []suffix;

	app->m_strEdit.AppendString("TransTree algorithm is successfully done.");
	
}

void CFPtree::scan1_data(data *fdat)
{
	long nTrans=0;        //calculate the total of transactions
	int  i, j;
	int  maxitemno;
	int *counts;
	
	maxitemno=0;
	ItemNum=ITEM_NUM;
	counts=(int*)fp_buf->newbuf(ItemNum, sizeof(int));
	
	for(i=0; i<ItemNum; i++)
		counts[i] = 0;
	
	Transaction *Tran = new Transaction;
	assert(Tran!=NULL);
	
	while(Tran = fdat->getNextTrans(Tran))
	{	
		if (Tran->length > TransLen)
			TransLen=Tran->length;

		for(i=0; i<Tran->length; i++) 
		{
			if(Tran->t[i]>=ItemNum)    // if itemno exceeds the original ItemNum
			{
				ItemNum = 2*Tran->t[i];
				int* temp = new int[2*Tran->t[i]];
				for(j=0; j<=maxitemno; j++)
					temp[j] = counts[j];
				for(; j<ItemNum; j++)
					temp[j] = 0;
				delete []counts;
				counts=temp;
				maxitemno=Tran->t[i];
			}else
				if(maxitemno<Tran->t[i])
					maxitemno = Tran->t[i];

			counts[Tran->t[i]]++;
		}
		nTrans++;
	}
	
	ItemNum = maxitemno+1;
	
	TransNum = nTrans;
	double ftemp=fMinSupp*TransNum/100.;
	iSupport=(int)ftemp;
	if (ftemp-iSupport>0.0001)
		iSupport++;

	fisLen=0;
	for (j=0; j<ItemNum; j++)
	{
		if (counts[j] >= iSupport)
			fisLen++;
	}

	if (fisLen == 0)
	{
		AfxMessageBox("No frequent 1-itemset, because the support threshold is too large.", MB_OK);
		return;
	}

	f1item = new ItemType[fisLen];
	f1count= new CountType[fisLen];
	f1order= new int[ItemNum];
	assert(f1item!=NULL && f1count!=NULL && f1order!=NULL);
	
	head=new NodePtr[fisLen];
	assert(head != NULL);
	
	// 得到频繁1-项集和支持度(按itemid从小到大)
	i=0;
	for (j=0; j<ItemNum; j++)
	{
		f1order[j] = -1;
		if (counts[j] >= iSupport)
		{
			f1item[i] = j;
			f1count[i] = counts[j];
			i++;
		}
	}

	common *cmn=new common;

	cmn->QuickSort(f1count, f1item, 0, fisLen-1);
	for (i=0; i<fisLen; i++)
	{
		f1order[f1item[i]]=i;
		head[i]=NULL;
	}
	
	delete cmn;
	
	fp_buf->buffree();
	delete Tran;
}

void CFPtree::scan2_data(data *fdat)
{
	long nTrans=0;        //calculate the total of transactions

	int  itemidx;
	int  num_fitems;
	ItemType *fitem=new ItemType[TransLen];
	
	int i, j;
	Transaction *Tran = new Transaction;
	assert(Tran!=NULL && fitem!=NULL);
	common *cmn=new common;
	
	max_num_items=0;
	for(i=0; i<TransNum; i++)
	{
		Tran = fdat->getNextTrans(Tran);
		num_fitems=0;
		for (j=0; j<Tran->length; j++)
		{
			itemidx=f1order[Tran->t[j]];
			if ( itemidx != -1)
				fitem[num_fitems++] = itemidx;
		}  // end of for j

		if (num_fitems)
		{
			cmn->QuickSort(fitem, 0, num_fitems-1);
			if (!fptree->insert_tree(num_fitems, fitem))
			{
				CString strTmp;
				strTmp.Format("在向树中加入第 %d 个事务时,内存空间已满。", nTrans);
				AfxMessageBox(strTmp,MB_OK);
				exit(0);
			}
			
			if (num_fitems > max_num_items)
				max_num_items = num_fitems;
		}

	}  // end of for

	delete cmn;
	delete []f1order;
	delete []fitem;
	delete Tran;

}

// 连接相同项的结点链
void CFPtree::DrawNodeLink(Node *pParen, Node *pCur)
{
	Node *p2;
	int ipos = pCur->itemNo;
	
	// 改右兄弟链为相同项的结点链
	if (head[ipos]==NULL)
		head[ipos]=pCur;
	else
		rear[ipos]->pSibNext=pCur;
	
	rear[ipos]=pCur;
	nodecount[ipos]++;

	p2=pCur->pSibNext;   // 保留原来的右兄弟
	if (pCur->psLink != NULL)  // 有子结点(左子树)
		DrawNodeLink(pCur, pCur->psLink);
	
	if (p2 != NULL)  // (右子树)
		DrawNodeLink(pParen, p2);

	pCur->psLink = pParen;
	pCur->pSibNext=NULL;
	return;
}

void CFPtree::FPmine(FSout *fsout, int first, int last)
{
	int k;

	while ((first<=last) && (f1count[first]<iSupport))
		first++;

	if (first > last)
		return;

	suffix[suffixlen] = first;
	fsout->printSet(suffixlen+1, suffix, f1count[first]);

	for (k=first+1; k<=last; k++)
	{
		if (f1count[k]>=iSupport)
		{
			suffix[suffixlen] = k;
			suffixlen++;
			fsout->printSet(suffixlen, suffix, f1count[k]);
			Build_ST(k, first);
			FPmine(fsout, first, k-1);
			suffixlen--;
		}
	}

}

void CFPtree::Build_ST(int k, int first)
{
	int i;
	Node *pt;
	
	for (i=first; i<k; i++)
	{
		head[i]=NULL;
		rear[i]=NULL;
		f1count[i]=0;
	}

	for (pt=head[k]; pt!=NULL; pt=pt->pSibNext)
	{
		int basecount=pt->count;
		Node *p=pt->psLink;
		for (; p->itemNo>=first; p=p->psLink)
		{
			if ((rear[p->itemNo]==NULL) || (rear[p->itemNo] != p))
			{
				if (head[p->itemNo]==NULL)
					head[p->itemNo]=p;
				else
					rear[p->itemNo]->pSibNext=p;
				
				rear[p->itemNo]=p;
				p->pSibNext=NULL;
				p->count=basecount;
			}
			else
				p->count += basecount;
			
			f1count[p->itemNo] += basecount;
		}
	} // end of for
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?