fptree.cpp
来自「关联规则中转换树算法在VC下的实现」· C++ 代码 · 共 453 行
CPP
453 行
// FPtree.cpp: implementation of the CFPtree class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "AssocRule.h"
#include "FPtree.h"
#include "FSout.h"
#include "buffer.h"
#include "common.h"
#include <assert.h>
#include <sys/timeb.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
int TransLen; // max length of transactions or 80
int ItemNum; // number of all items
int fisLen; // length of frequent 1-itemset
ItemType *f1item; // frequent 1-itemset orderd by frequency descending
CountType *f1count; // frequency of frequent 1-itemset
double totalFIs;
int shown;
memory* fp_buf; // 指向树存储区的指针
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CFPtree::CFPtree()
{
}
CFPtree::~CFPtree()
{
}
void CFPtree::main()
{
CString strTmp;
int i;
double time1, time2, time3;
struct timeb tm;
char *timeline;
head=NULL;
TransLen=TRANSLEN;
totalFIs=0;
ftime (&tm);
timeline = ctime( & ( tm.time ) );
time1 = (double)tm.time+tm.millitm/1000.;
data* fdat=new data(infile);
if(!fdat->isOpen()) {
strTmp.Format("Input file %s could not be opened!", infile);
AfxMessageBox(strTmp);
exit(2);
}
char str1[255];
FSout* fsout;
fsout = new FSout(outfile);
if (!fsout->isOpen())
{
strTmp.Format("Output file %s could not be opened!", outfile);
AfxMessageBox(strTmp);
exit(3);
}
FSout* flog;
flog = new FSout(logfile);
if (!flog->isOpen())
{
strTmp.Format("Output file %s could not be opened!", logfile);
AfxMessageBox(strTmp);
exit(3);
}
sprintf(str1, "Input data file: %s ", infile);
flog->printstring(str1);
if (FItype==MFI)
sprintf(str1, "\nMining algorithm: TransTree for mining MFI.");
else if (FItype==FI)
sprintf(str1, "\nMining algorithm: TransTree for mining FI.");
flog->printstring(str1);
sprintf(str1, "\nMinimal support threshold: %7.2f %% ", fMinSupp);
flog->printstring(str1);
sprintf(str1, "\nMinimal confidence threshold: %7.2f %% ", fMinConf);
flog->printstring(str1);
sprintf(str1, "\n\nStart time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
flog->printstring(str1);
fp_buf=new memory(30, 2097152, 4194304L, 2);
// fp_buf=new memory(60, 4194304L, 8388608L, 2);
app->m_strEdit.AppendString("第一次读交易数据, 产生频繁1-项集......");
scan1_data(fdat);
ftime (&tm);
timeline = ctime( & ( tm.time ) );
time2 = (double)tm.time+tm.millitm/1000.;
sprintf(str1, "\nFirst scan end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
flog->printstring(str1);
sprintf(str1, "\nFirst scan cost: %12.3f seconds\n",time2-time1);
flog->printstring(str1);
sprintf(str1, "Total transactions: %d \n",TransNum);
flog->printstring(str1);
sprintf(str1, "Absolute support threshold: %d \n",iSupport);
flog->printstring(str1);
sprintf(str1, "\nSize of frequent 1-itemset: %ld . ", fisLen);
flog->printstring(str1);
app->m_strEdit.AppendString("第二次读交易数据, 生成 FP-树......");
fptree=new CTreeNode;
scan2_data(fdat);
fdat->close();
ftime (&tm);
timeline = ctime( & ( tm.time ) );
time3 = (double)tm.time+tm.millitm/1000.;
sprintf(str1, "\nSecond scan (construct CFP-tree) end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
flog->printstring(str1);
sprintf(str1, "\nSecond scan cost: %12.3f seconds\n",time3-time2);
flog->printstring(str1);
sprintf(str1, "\nMaximal FI number in a transaction is: %d . ", max_num_items);
flog->printstring(str1);
// 连接结点链
rear = new NodePtr[fisLen]; // 创建项尾指针表
nodecount = new int[fisLen];
assert (rear != NULL && nodecount != NULL);
for (i=0; i<fisLen; i++)
{
rear[i]=NULL;
nodecount[i]=0;
}
Node *pRt=fptree->getRoot();
DrawNodeLink(pRt, pRt->psLink);
pRt->psLink=NULL;
ftime (&tm);
timeline = ctime( & ( tm.time ) );
time2 = (double)tm.time+tm.millitm/1000.;
sprintf(str1, "\nBuilding node-link end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
flog->printstring(str1);
sprintf(str1, "\nBuilding node-link cost: %12.3f seconds\n",time2-time3);
flog->printstring(str1);
int totalNodes=1;
for (i=0; i<fisLen; i++)
totalNodes += nodecount[i];
sprintf(str1, "\nThere are %d nodes in the tree. \n\n", totalNodes);
flog->printstring(str1);
app->m_strEdit.AppendString("开始挖掘 FP-树,生成频繁项集......");
suffix=new ItemType[max_num_items];
suffixlen=0;
FPmine(fsout, 0, fisLen-1);
ftime (&tm);
timeline = ctime( & ( tm.time ) );
time3 = (double)tm.time+tm.millitm/1000.;
sprintf(str1, "\n\ntotal number of frequent itemsets: %10.0f\n", totalFIs );
flog->printstring(str1);
sprintf(str1, "\nMining end time: %.19s.%hu %s", timeline, tm.millitm, &timeline[20] );
flog->printstring(str1);
sprintf(str1, "\nMining cost: %12.3f seconds\n",time3-time2);
flog->printstring(str1);
sprintf(str1, "\nTotal cost: %12.3f seconds\n",time3-time1);
flog->printstring(str1);
fsout->close();
flog->close();
delete fptree;
delete fp_buf;
delete []rear;
delete []f1item;
delete []f1count;
delete []head;
delete []nodecount;
delete []suffix;
app->m_strEdit.AppendString("TransTree algorithm is successfully done.");
}
void CFPtree::scan1_data(data *fdat)
{
long nTrans=0; //calculate the total of transactions
int i, j;
int maxitemno;
int *counts;
maxitemno=0;
ItemNum=ITEM_NUM;
counts=(int*)fp_buf->newbuf(ItemNum, sizeof(int));
for(i=0; i<ItemNum; i++)
counts[i] = 0;
Transaction *Tran = new Transaction;
assert(Tran!=NULL);
while(Tran = fdat->getNextTrans(Tran))
{
if (Tran->length > TransLen)
TransLen=Tran->length;
for(i=0; i<Tran->length; i++)
{
if(Tran->t[i]>=ItemNum) // if itemno exceeds the original ItemNum
{
ItemNum = 2*Tran->t[i];
int* temp = new int[2*Tran->t[i]];
for(j=0; j<=maxitemno; j++)
temp[j] = counts[j];
for(; j<ItemNum; j++)
temp[j] = 0;
delete []counts;
counts=temp;
maxitemno=Tran->t[i];
}else
if(maxitemno<Tran->t[i])
maxitemno = Tran->t[i];
counts[Tran->t[i]]++;
}
nTrans++;
}
ItemNum = maxitemno+1;
TransNum = nTrans;
double ftemp=fMinSupp*TransNum/100.;
iSupport=(int)ftemp;
if (ftemp-iSupport>0.0001)
iSupport++;
fisLen=0;
for (j=0; j<ItemNum; j++)
{
if (counts[j] >= iSupport)
fisLen++;
}
if (fisLen == 0)
{
AfxMessageBox("No frequent 1-itemset, because the support threshold is too large.", MB_OK);
return;
}
f1item = new ItemType[fisLen];
f1count= new CountType[fisLen];
f1order= new int[ItemNum];
assert(f1item!=NULL && f1count!=NULL && f1order!=NULL);
head=new NodePtr[fisLen];
assert(head != NULL);
// 得到频繁1-项集和支持度(按itemid从小到大)
i=0;
for (j=0; j<ItemNum; j++)
{
f1order[j] = -1;
if (counts[j] >= iSupport)
{
f1item[i] = j;
f1count[i] = counts[j];
i++;
}
}
common *cmn=new common;
cmn->QuickSort(f1count, f1item, 0, fisLen-1);
for (i=0; i<fisLen; i++)
{
f1order[f1item[i]]=i;
head[i]=NULL;
}
delete cmn;
fp_buf->buffree();
delete Tran;
}
void CFPtree::scan2_data(data *fdat)
{
long nTrans=0; //calculate the total of transactions
int itemidx;
int num_fitems;
ItemType *fitem=new ItemType[TransLen];
int i, j;
Transaction *Tran = new Transaction;
assert(Tran!=NULL && fitem!=NULL);
common *cmn=new common;
max_num_items=0;
for(i=0; i<TransNum; i++)
{
Tran = fdat->getNextTrans(Tran);
num_fitems=0;
for (j=0; j<Tran->length; j++)
{
itemidx=f1order[Tran->t[j]];
if ( itemidx != -1)
fitem[num_fitems++] = itemidx;
} // end of for j
if (num_fitems)
{
cmn->QuickSort(fitem, 0, num_fitems-1);
if (!fptree->insert_tree(num_fitems, fitem))
{
CString strTmp;
strTmp.Format("在向树中加入第 %d 个事务时,内存空间已满。", nTrans);
AfxMessageBox(strTmp,MB_OK);
exit(0);
}
if (num_fitems > max_num_items)
max_num_items = num_fitems;
}
} // end of for
delete cmn;
delete []f1order;
delete []fitem;
delete Tran;
}
// 连接相同项的结点链
void CFPtree::DrawNodeLink(Node *pParen, Node *pCur)
{
Node *p2;
int ipos = pCur->itemNo;
// 改右兄弟链为相同项的结点链
if (head[ipos]==NULL)
head[ipos]=pCur;
else
rear[ipos]->pSibNext=pCur;
rear[ipos]=pCur;
nodecount[ipos]++;
p2=pCur->pSibNext; // 保留原来的右兄弟
if (pCur->psLink != NULL) // 有子结点(左子树)
DrawNodeLink(pCur, pCur->psLink);
if (p2 != NULL) // (右子树)
DrawNodeLink(pParen, p2);
pCur->psLink = pParen;
pCur->pSibNext=NULL;
return;
}
void CFPtree::FPmine(FSout *fsout, int first, int last)
{
int k;
while ((first<=last) && (f1count[first]<iSupport))
first++;
if (first > last)
return;
suffix[suffixlen] = first;
fsout->printSet(suffixlen+1, suffix, f1count[first]);
for (k=first+1; k<=last; k++)
{
if (f1count[k]>=iSupport)
{
suffix[suffixlen] = k;
suffixlen++;
fsout->printSet(suffixlen, suffix, f1count[k]);
Build_ST(k, first);
FPmine(fsout, first, k-1);
suffixlen--;
}
}
}
void CFPtree::Build_ST(int k, int first)
{
int i;
Node *pt;
for (i=first; i<k; i++)
{
head[i]=NULL;
rear[i]=NULL;
f1count[i]=0;
}
for (pt=head[k]; pt!=NULL; pt=pt->pSibNext)
{
int basecount=pt->count;
Node *p=pt->psLink;
for (; p->itemNo>=first; p=p->psLink)
{
if ((rear[p->itemNo]==NULL) || (rear[p->itemNo] != p))
{
if (head[p->itemNo]==NULL)
head[p->itemNo]=p;
else
rear[p->itemNo]->pSibNext=p;
rear[p->itemNo]=p;
p->pSibNext=NULL;
p->count=basecount;
}
else
p->count += basecount;
f1count[p->itemNo] += basecount;
}
} // end of for
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?