⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 maxfpminer.cpp

📁 最大频繁项集挖掘算法。运行前需将release中的data和result数据拷贝到上一级目录下。
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// MiningMaxfp.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define TEMP_INFO_FILE "F:\\MFP-Miner\tempinfo.txt"
#define PATHLENGTH 100
#define MINNODECOUNT 4000
#define ITEMCOUNT 1000

// Define used types

// Frequency type
typedef unsigned FREQ_TYPE;

// Item type
typedef unsigned ITEM_TYPE;

// Fptree node type
typedef struct FptreeNode{
   	unsigned nNumber;
   	FREQ_TYPE fFrequency;
  	struct FptreeNode *pVertical;
  	struct FptreeNode *pHorizon;
} FPTREENODE;

//  Item information type
typedef struct ItemInfo{
	FREQ_TYPE fFrequency;
    unsigned nNumber;
} ITEMINFO;

// Memory Block type
typedef struct MemoryBlock{
	FPTREENODE *pMBlock;
	struct MemoryBlock *pNext;
} MEMORYBLOCK;

// Declare functions
void Initialize();
void Sift(unsigned nFirst,unsigned nEnd);
void Sort();
void BuildFptree();
void ReverseLink();
void MiningMaxfp();
bool EliminateBranches(FPTREENODE *pV_Root,FPTREENODE *pH_Root);
void PrintMaxpatterns();
void FreeMemory();

// Declare global variables
FREQ_TYPE fSupport;
ITEM_TYPE itMaxItem;
unsigned nTotalLine;
ITEMINFO *arrfItemInfo;
ITEM_TYPE *arritNum;
FPTREENODE *pRoot;
FPTREENODE *pM_Root;
FPTREENODE **arrpFpHorizon;
FPTREENODE *pBlock;
MEMORYBLOCK *pMemory;
FPTREENODE *pNodeRecycle,*pHeaders;
ITEM_TYPE *arrNumberToItem;
FREQ_TYPE *arrfFreq;
unsigned nItemCount,nFPMaxlength;
unsigned nNodeCount,nFptreeNodeCount,nMptreeNodeCount;
unsigned long nMaxfpCount;
unsigned _int64 nTotalMined,nTotal;
FILE *fileData;
FILE *fileResult;
float fDegree;
char *strFileName;
char *strResultFile;
clock_t clockStart;
clock_t clockFirstScan;
clock_t clockBuildFpree;
clock_t clockEnd;
clock_t clockPrintMaxfp;
// Main program
int main(int argc,char *argv[])
{
	// Declaration
	FILE *fileInfo;
	float fTemp;
	char ch,p2[PATHLENGTH+1],p3[PATHLENGTH+1];
//第一个参数支持度,第二个参数结果文件,
//第三个参数数据文件:每行一个事件,空格分隔数据项,每行结束以-1结束
	if(argc>4)
	{
		// Open tempory mining information file
		if((fileInfo=fopen(TEMP_INFO_FILE,"r"))==NULL)
		{
			puts("Three parameters are needed:");
			puts("Support degree_Result file_Data file.");
			printf("Press ENTER to exit");
			ch=getchar();
			return 0;
		}

		// Read information from temporary information file
		fscanf(fileInfo,"%f%s%s",&fTemp,p2,p3);
		p2[PATHLENGTH]='\0';
		p3[PATHLENGTH]='\0';
		fclose(fileInfo);
		if(argc<4) strFileName=p3;
		if(argc<3) strResultFile=p2;
		if(argc<2) fDegree=fTemp;
	}
	else{
		//fDegree=(float)atof(argv[1]);
		//strResultFile=argv[2];
		//strFileName=argv[3];
		fDegree=10;
		strResultFile=".\\result.txt";
		strFileName=".\\data.txt";
	}

	// Write information into temporary file
	if((fileInfo=fopen(TEMP_INFO_FILE,"w"))!=NULL)
	{
		fprintf(fileInfo,"%f\n",fDegree);
		fprintf(fileInfo,"%s\n",strResultFile);
		fprintf(fileInfo,"%s\n",strFileName);

		fclose(fileInfo);
	}

	// Confirm information
	puts("Mining information : ...");
	printf("Data-file path: %s\n",strFileName);
	printf("Supporting degree(%%): %f\n",fDegree);
	printf("The result will be written to file: %s\n\n",strResultFile);
	printf("Press ENTER to confirm and continue");
	ch=getchar();
	if(ch!='\n') return 0;

    // Initialization
	clockStart=clock();
    Initialize();
	clockFirstScan=clock();

	// Print used time in first scaning
	printf("done in seconds %.3f\n",(clockFirstScan-clockStart)/(double)CLOCKS_PER_SEC);

	// Write information to result file
	puts("Data information ...");
	fprintf(fileResult,"Data-file path: %s\n",strFileName);
	fprintf(fileResult,"Maximum item: %u\n",itMaxItem);
	printf("Maximum item: %u\n",itMaxItem);
	fprintf(fileResult,"Total lines: %u\n",nTotalLine);
	printf("Total lines: %u\n",nTotalLine);
	fprintf(fileResult,"Support frequency: %u\n",fSupport);
	printf("Support frequency: %u\n\n",fSupport);
	fprintf(fileResult,"Supporting degree(%%): %f\n",fDegree);

    // Build fptree
    printf("Build fptree ... ");
    BuildFptree();
	clockBuildFpree=clock();
	printf("done in seconds %.3f\n",(clockBuildFpree-clockFirstScan)/(double)CLOCKS_PER_SEC);

    // MiningMaxfp algorithm
    printf("Mining start ... ");
	fprintf(fileResult,"Max-patterns are : ...\n\n");
    MiningMaxfp();
	clockEnd=clock();
	printf("done in seconds %.3f\n",(clockEnd-clockBuildFpree)/(double)CLOCKS_PER_SEC);
	printf("Print maxfp in seconds %.3f\n",(clockEnd-clockPrintMaxfp)/(double)CLOCKS_PER_SEC);

    // Print pattern number and used time
	printf("All completed in seconds: %.3f\n\n",(clockEnd-clockStart)/(double)CLOCKS_PER_SEC);
	printf("Total pattern number is: %I64u\n",nTotal-1);
	printf("Total mined pattern number is: %I64u\n",nTotalMined);
	printf("Total max-pattern number is: %lu\n\n",nMaxfpCount);
	fprintf(fileResult,"\nTotal pattern number is: %I64u\n",nTotal-1);
	fprintf(fileResult,"Total mined pattern number is: %I64u\n",nTotalMined);
	fprintf(fileResult,"Total max-pattern number is: %lu\n",nMaxfpCount);
	fprintf(fileResult,"Count of fptree nodes: %u\n",nFptreeNodeCount);
	fprintf(fileResult,"Count of max-pattern tree nodes: %u\n",nMptreeNodeCount);
	fprintf(fileResult,"Fptree growth completed in seconds: %.3f\n",(clockEnd-clockBuildFpree)/(double)CLOCKS_PER_SEC);
	fprintf(fileResult,"All completed in seconds: %.3f\n",(clockEnd-clockStart)/(double)CLOCKS_PER_SEC);

	// Close result file
	fclose(fileResult);

    // Clear the fptree
	printf("Delete fptree and max-pattern tree ... ");
    FreeMemory();
	puts("done\n");

	printf("Press ENTER to exit");
	ch=getchar();

	return 0;
}

// Initialize the variables
void Initialize()
{
    unsigned i,nLength,nTemp;
	float fTemp;
	ITEM_TYPE item;
	char ch;

	// Open data file
	if((fileData=fopen(strFileName,"r"))==NULL)
	{
		printf("\nCan't open fptree data file. \n");
		printf("Press ENTER to exit");
		ch=getchar();
		exit(1);
	}

	// Allocation分配
    arrfItemInfo=new ITEMINFO[ITEMCOUNT];

	// Initialize item information
	for(i=0;i<ITEMCOUNT;i++)
	{
		// Initialize information of each item
		arrfItemInfo[i].fFrequency=0;
		arrfItemInfo[i].nNumber=0;
	}

	// Initialization
	nFPMaxlength=0;
	nTotalLine=0;
	itMaxItem=0;
	nLength=ITEMCOUNT;

    // Caculate the frequency of each item while scan the data file
	printf("\nGet data information and sort ... ");
	while(!feof(fileData))
	{
		do
		{
			fscanf(fileData,"%u",&item);
			if(item==-1)
			{
				nTotalLine++;
				if(!feof(fileData)) continue;
				break;
			}
			if(item>itMaxItem)
			{
				itMaxItem=item;
				if(itMaxItem>=nLength)
				{
					nTemp=nLength;
					while(nLength<=itMaxItem) nLength+=ITEMCOUNT;
					arrfItemInfo=(ITEMINFO *)realloc(arrfItemInfo,nLength*sizeof(ITEMINFO));
					for(i=nTemp;i<nLength;i++)
					{
						// Initialize information of each item
						arrfItemInfo[i].fFrequency=0;
						arrfItemInfo[i].nNumber=0;
					}
				}
			}
printf("%d\n",item);
			arrfItemInfo[item].fFrequency++;
		}while(1);

	}

	// Extract the array
	realloc(arrfItemInfo,(itMaxItem+1)*sizeof(ITEMINFO));

	// Caculate the support frequency
	fTemp=nTotalLine*fDegree/100;
	fSupport=(FREQ_TYPE)fTemp;
	if(fTemp-fSupport>0.0001) fSupport++;

	// Create result file
	if((fileResult=fopen(strResultFile,"w"))==NULL)
	{
		delete[]arrfItemInfo;
		printf("\nCan't create result file: %s\n",strResultFile);
		printf("Press ENTER to exit");
		ch=getchar();
		exit(2);
	}

	// close data file
	fclose(fileData);

    // Sort the items in term of frequency
    Sort();
}

// Heap-sort algorithm
void Sort()
{
	ITEM_TYPE itTemp,i;
    unsigned j;

    // Create temporary array
    arritNum=new ITEM_TYPE[itMaxItem+2];

	// Initialize the itemfirst-array
    nItemCount=0; 
	for(i=0;i<=itMaxItem;i++)
	{
		if(arrfItemInfo[i].fFrequency<fSupport) continue;
        nItemCount++;
		arritNum[nItemCount]=i;  
	}

	// If no frequent items, exit program
	if(!nItemCount)
	{
		delete[]arritNum;
		delete[]arrfItemInfo;
		printf("no frequent items ... done\n\n");
		printf("Press ENTER to exit");
		getchar();
		exit(0);
	}

	// Create horizontal links
	arrpFpHorizon=new FPTREENODE *[nItemCount+1];

	// Create number to item array
	arrNumberToItem=new ITEM_TYPE[nItemCount+1];

	// Create frequency array
	arrfFreq=new FREQ_TYPE[nItemCount+2];

    // Build heap
    for(j=nItemCount/2;j>0;j--) Sift(j,nItemCount);

	// Sort the elements and store their informations
    for(j=nItemCount;j>0;j--)
    {
        itTemp=arritNum[1];
        arritNum[1]=arritNum[j];
        arrfItemInfo[itTemp].nNumber=j;
        arrNumberToItem[j]=itTemp;
		arrpFpHorizon[j]=0;
		arrfFreq[j]=0;

		Sift(1,j-1);
    }

	// Free allocated memory释放内存
    delete[]arritNum;
} 

// Heap adjustment
void Sift(unsigned nFirst,unsigned nEnd)
{
	// Declare variables
    FREQ_TYPE fTemp,fMin,fT;
    ITEM_TYPE itTemp;
    unsigned nCurrent,nNext;

	// Initialization
    itTemp=arritNum[nFirst];
    fTemp=arrfItemInfo[itTemp].fFrequency;
    nCurrent=nFirst;
    nNext=nCurrent+nCurrent;

	// Adjust the heap to a sorted-tree
    while(nNext<=nEnd)
    {
		// Get the smaller son
        fMin=arrfItemInfo[arritNum[nNext]].fFrequency;
        if(nNext<nEnd && fMin>(fT=arrfItemInfo[arritNum[nNext+1]].fFrequency))
        {
            fMin=fT;
            nNext++;
        }

		// Deal with next level
        if(fTemp<=fMin) break;
        else{
            arritNum[nCurrent]=arritNum[nNext];
            nCurrent=nNext;
            nNext=nCurrent+nCurrent;
		}
	}

	// End the adjustment
	arritNum[nCurrent]=itTemp;
}

// Build Fptree
void BuildFptree()
{
	// Declare variables
	unsigned i,j,nCount,nTemp,nN,*arrnTick;
	FPTREENODE *pAnces,*pPre,*pQuery,*pInsert;
	MEMORYBLOCK *pTempMemory;
	ITEM_TYPE item;
	char ch;

	// Initialize fptree node count
	nFptreeNodeCount=1;

	// Ensure an opened data-file
	if((fileData=fopen(strFileName,"r"))==NULL)
	{
		printf("\nCan't open fptree data file. \n");
		printf("Press ENTER to exit");
		ch=getchar();
		exit(3);
	}

	// Create arrnTick array and initialize
	arrnTick=new unsigned[nItemCount+1];
	for(i=0;i<=nItemCount;i++) arrnTick[i]=0;

	// Create first memory block
	pMemory=new MEMORYBLOCK;
	pMemory->pNext=0;
	pBlock=new FPTREENODE[MINNODECOUNT];
	pMemory->pMBlock=pBlock;

	// Initialize fptree root
	pRoot=pBlock;
	pRoot->nNumber=0;
	pRoot->pVertical=0;
	pM_Root=pBlock+1;
	pM_Root->nNumber=nItemCount+1;
	pM_Root->pVertical=pRoot;
	nNodeCount=2;

	// Build the fptree
	while(!feof(fileData))
	{
		// Initialize nCount
		nCount=0;

		// Read another transaction and sort
		do
		{
			// Read another item
			fscanf(fileData,"%u",&item);
			if(item==-1) break;
		    nTemp=arrfItemInfo[item].nNumber;

			// Ensure an valid item
			if(nTemp!=0)
			{
				// Find the biggest-smaller integer
				i=nCount;
				while(arrnTick[i]>=nTemp) i--;

				// Verify existence
				i++;
				if(arrnTick[i]!=nTemp)
				{
					arrfFreq[nTemp]++;
					nCount++;
					for(j=nCount;j>i;j--) arrnTick[j]=arrnTick[j-1];
					arrnTick[i]=nTemp;
				}
			}

		// If not transaction end , continue
		}while(1);

		// Ensure an unempty transaction
		if(nCount==0) continue;

		// Get maximum-length
		if(nCount>nFPMaxlength) nFPMaxlength=nCount;

		// Initialization
		i=1;
		pAnces=pRoot;

		// Scan the turns of valid items in a transaction
		do
		{
			// Get the head-node
			pPre=pAnces->pVertical;

			// Check descendant NULL
			if(pPre==0)
			{
				do
				{
					// Get the head-node
					nN=arrnTick[i];

					// Create a new fpnode and initialize it
					if(nNodeCount==MINNODECOUNT)
					{
						pTempMemory=pMemory;
						pMemory=new MEMORYBLOCK;
						pMemory->pNext=pTempMemory;
						pBlock=new FPTREENODE[MINNODECOUNT];
						pMemory->pMBlock=pBlock;
						nNodeCount=0;
					}
					pQuery=pBlock+nNodeCount;
					nNodeCount++;
					nFptreeNodeCount++;
  					pQuery->nNumber=nN+nItemCount;
					pQuery->fFrequency=1;
					pQuery->pHorizon=pQuery;

					// Set ancestor-node
					pAnces->pVertical=pQuery;
					pAnces=pQuery;

					// Inspect next item
					arrnTick[i]=0;
					i++;
				}while(i<=nCount);

				// End vertical link
				pAnces->pVertical=0;

				break;
			}

			// Get the current number
			nN=arrnTick[i];

			// Have finded it
			if(pPre->nNumber==nN+nItemCount)
			{
				pPre->fFrequency++;
				pAnces=pPre;

				// Inspect next item
				arrnTick[i]=0;
				i++;

				if(i<=nCount) continue;

				break;
			}

			// Browse in link
			pQuery=pPre->pHorizon;
			while(pQuery->nNumber<nN)
			{
				pPre=pQuery;
				pQuery=pPre->pHorizon;
			}

			// Deal with the result
			if(pQuery->nNumber>nN)
			{
				// Create a new node and initialize it
				if(nNodeCount==MINNODECOUNT)
				{
					pTempMemory=pMemory;
					pMemory=new MEMORYBLOCK;
					pMemory->pNext=pTempMemory;
					pBlock=new FPTREENODE[MINNODECOUNT];
					pMemory->pMBlock=pBlock;
					nNodeCount=0;
				}
				pInsert=pBlock+nNodeCount;
				nNodeCount++;
				nFptreeNodeCount++;
				pInsert->fFrequency=1;
				pInsert->nNumber=nN;

				// Link it to brother link
				pInsert->pHorizon=pQuery;
				pPre->pHorizon=pInsert;

				// Set ancestor node
				pAnces=pInsert;

				// Go to next item
				arrnTick[i]=0;
				i++;

				while(i<=nCount)
				{
					// Get the head-node
					nN=arrnTick[i];

					// Create a new fpnode and initialize it
					if(nNodeCount==MINNODECOUNT)
					{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -