⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fptree.cpp

📁 FP-growth算法的改进C++程序,具有较好的扩展性和应用性,本程序改成用行读取
💻 CPP
📖 第 1 页 / 共 3 页
字号:
// Fptree1.cpp: implementation of the Fptree class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "Fptree.h"
#include <stdlib.h>
#include <time.h>
#include <fstream>
#include <string>
using namespace std;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

Fptree::Fptree()
{
	m_sumflag = false;
	m_userTime = m_sysTime = 0.0;
}

Fptree::~Fptree()
{
}

int Fptree::Execute(const char *param)
{
	/* read input parameters --------------------------*/
	printf("input\n");
	input(param);
#if !PLAT_WIN
	getrusage(RUSAGE_SELF,&myTime1);
#else 
	m_tm1 = m_line.GetTime(true);
#endif
	
	/* pass 1 : Mine the large 1-itemsets -------------*/
	printf("\npass1\n");
	ParseItemSet();
	
	/* Mine the large k-itemsets (k = 2 to realK) -----*/
	if (numLarge[0] > 0) {
		
		/* create FP-tree --------------------------*/
		printf("\nbuildTree\n");
		BuildTree();
#if !PLAT_WIN
		getrusage(RUSAGE_SELF,&myTime2);
#else
		m_tm2 = m_line.GetTime(true);
#endif
		/* mining frequent patterns ----------------*/
		printf("\npassK\n");
		m_headerTableSize= numLarge[0];
		numLarge[0] = 0;
		FPgrowth(root, headerTableLink, m_headerTableSize, NULL, 0);
#if !PLAT_WIN
		getrusage(RUSAGE_SELF,&myTime3);
#else
		m_tm3 = m_line.GetTime(true);
#endif
		/* output result of large itemsets ---------*/
		printf("\nresult\n");
		displayResult();
	}
	DisplayTime();
	/* free memory ------------------------------------*/
	printf("\ndestroy\n");
	this->Destroy();
	return 0;
}

void Fptree::ParseItemSet()
{
 int transSize;
 int item;
 int maxSize=0;
 FILE *fp;
 int i, j;

 /* Initialize the 1-itemsets list and support list */
 support1 = (int *) malloc (sizeof(int) * numItem);
 largeItem1 = (int *) malloc (sizeof(int) * numItem);
 if ((support1 == NULL) || (largeItem1 == NULL)) {
	printf("out of memory\n");
	exit(1);
 }

 for (i=0; i < numItem; i++) { 
	support1[i] = 0;
	largeItem1[i] = i;
 }

 /* scan DB to count the frequency of each item */

 if ((fp = fopen(dataFile, "r")) == NULL) {
        printf("Can't open data file, %s.\n", dataFile);
        exit(1);
 }

 /* Scan each transaction of the DB */
 for (i=0; i < numTrans; i++) {

	/* Read the transaction size */
//	fscanf(fp, "%d", &transSize);
	 m_line.ReadLine(fp);
	 transSize = m_line.GetFieldSize();
	 

	/* Mark down the largest transaction size found so far */
	if (transSize > maxSize)
		maxSize = transSize;

	/* Read the items in the transaction */
	for (j=0; j < transSize; j++) {
		//fscanf(fp, "%d", &item);
		item = m_line.GetBufInt(j);
		support1[item]++;
	}
 } 
 fclose(fp);
 
 /* Determine the upper limit of itemset size to be mined according to DB and user input. 
  * If the user specified maximum itemset size (expectedK) is greater than 
  * the largest transaction size (maxSize) in the database, or  exptectedK <= 0,
  * then set  realK = maxSize;
  * otherwise  realK = expectedK
  */
 realK = expectedK;
 if ((maxSize < expectedK) || (expectedK <= 0))
	realK = maxSize;
 printf("max transaction sizes = %d\n", maxSize);
 printf("max itemset size (K_max) to be mined  = %d\n", realK);

 /* Initialize large k-itemset resulting list and corresponding support list */
 largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK); 
 numLarge = (int *) malloc (sizeof(int) * realK);

 if ((largeItemset == NULL) || (numLarge == NULL)) {
	printf("out of memory\n");
	exit(1);
 }

 for (i=0; i < realK; i++)  {
	largeItemset[i] = NULL;
	numLarge[i] = 0;
 }

 /* Sort the supports of 1-itemsets in descending order */
 q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem);

 /*
 for (i=0; i < numItem; i++) 
 	printf("%d[%d] ", largeItem1[i], support1[i]);
 printf("\n");
 */

 numLarge[0] = 0;
 while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold))
	(numLarge[0])++;

 printf("\nNo. of large 1-itemsets (numLarge[0]) = %d\n", numLarge[0]);

 return;
}

void Fptree::BuildTree()
{
 int *freqItemP;	/* Store frequent items of a transaction */
 int *indexList;	/* indexList[i] = the index position in the large 1-item list storing freqItemP[i] */
 int count;		/* Number of frequent items in a transaction */
 FILE *fp;		/* Pointer to the database file */
 int transSize;		/* Transaction size */
 int item;		/* An item in the transaction */
 int i, j, m;
 int path;		/* Number of new tree paths (i.e. new leaf nodes) created so far */


 /* Create header table */
 headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]);
 if (headerTableLink == NULL) {
	printf("out of memory\n");
	exit(1);
 }
 for (i=0; i < numLarge[0]; i++)
	headerTableLink[i] = NULL;
	
 /* Create root of the FP-tree */
 root = (FPTreeNode) malloc (sizeof(FPNode));
 if (root == NULL) {
	printf("out of memory\n");
	exit(1);
 }

 /* Initialize the root node */
 root->numPath = 1;
 root->parent = NULL;
 root->children = NULL;
 root->hlink = NULL;

 /* Create freqItemP to store frequent items of a transaction */
 freqItemP = (int *) malloc (sizeof(int) * numItem);
 if (freqItemP == NULL) {
	printf("out of memory\n");
	exit(1);
 }	

 indexList = (int *) malloc (sizeof(int) * numItem);
 if (indexList == NULL) {
	printf("out of memory\n");
	exit(1);
 }	


 /* scan DB and insert frequent items into the FP-tree */
 if ((fp = fopen(dataFile, "r")) == NULL) {
        printf("Can't open data file, %s.\n", dataFile);
        exit(1);
 }


 for (i=0; i < numTrans; i++) {

	/* Read the transaction size */
//	fscanf(fp, "%d", &transSize);
	 m_line.ReadLine(fp);
	 transSize = m_line.GetFieldSize();
	 

	count = 0;
 	path = 0;

	for (j=0; j < transSize; j++) {

		/* Read a transaction item */
		//fscanf(fp, "%d", &item);
		item = m_line.GetBufInt(j);

		/* Store the item to the frequent list, freqItemP, 
		 * if it is a large 1-item.
		 */
		for (m=0; m < numLarge[0]; m++) {
			if (item == largeItem1[m]) {
				/* Store the item */
				freqItemP[count] = item;
				/* Store the position in the large 1-itemset list storing this item */
				indexList[count] = m;
				count++;
				break;
			} 
		}
	}

	/* Sort the items in the frequent item list in ascending order of indexList,
	 * i.e. sort according to the order of the large 1-itemset list
	 */
	q_sortA(indexList, freqItemP, 0, count-1, count);

	/* Insert the frequent patterns of this transaction to the FP-tree. */
	insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path);
 } 
 fclose(fp);

 free(freqItemP);
 free(indexList);
 free(largeItem1);
 free(support1);

 return;

}
void Fptree::FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize)
{
 int count;
 int i, j;
 int *pattern;
 int patternSupport;
 FPTreeNode aNode = NULL;
 int *conLargeItem;
 int *conLargeItemSupport;


 if (baseSize >= realK) return;
 if (T == NULL) return;

 /* Create array, conLargeItem, to store the items in the header table; 
  * and also an array, conLargeItemSupport, to store the corresponding count value
  */
 conLargeItem = (int *) malloc (sizeof(int) * headerSize);
 conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize);
 if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) {
	printf("out of memory\n");
	exit(1);
 }

 if (T->numPath == 1) {

	/* Case 1: Single Path */

	count = 0;
	if (T->children != NULL) aNode = T->children->node;

	/* Visit the path in top-down manner, and store the items and count values 
	 */
	while (aNode != NULL) {
		conLargeItem[count] = aNode->item;
		conLargeItemSupport[count] = aNode->count;
		count++;
		if (aNode->children != NULL)
			aNode = aNode->children->node;
		else	aNode = NULL;
	}

	/* Form any combination of items in the path to 
	 * generate large itemsets containing the base items stored in 'baseItems'
	 */
	combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize);

	free(conLargeItem);
	free(conLargeItemSupport);

 } else {
	/* Multiple Path */

	/* Create an array to store the base items for a conditional FP-tree. 
	 * Size of the base should be (baseSize + 1). 
	 */
	pattern = (int *) malloc (sizeof(int) * (baseSize + 1));
	if (pattern == NULL) {
		printf("out of memory\n");
		exit(1);
	}

 	/* Visit the header table in a top-down manner. 
	 * -- Find the conditional pattern base for each base item in the header table.
	 * -- Find the frequent items of the conditional pattern base of the base item.
	 * -- Build conditional FP-tree for the base item.
	 * -- Recursively mine the conditional FP-tree.
	 */
	for (i=0; i < headerSize; i++) {

		/* Add the item to the base of its conditional FP-tree
		 */
		pattern[0] = headerTableLink[i]->item;

		/* Add the base of T to the base of the conditional FP-tree
		 */
		for (j=0; j < baseSize; j++) {
			pattern[j+1] = baseItems[j];
		}

		aNode = headerTableLink[i];
		patternSupport = 0;

		/* Count the support of the base of the conditional FP-tree
		 */
		while (aNode != NULL) {
			patternSupport += aNode->count;
			aNode = aNode->hlink;
		}

		/* Add the itemset formed by the base items 
		 * to the resulting list because it must be large. 
		 */
		addToLargeList(pattern, patternSupport, baseSize);

		/* Find frquent items, build conditional FP-tree and perform mining.
		 */
		genConditionalPatternTree(pattern, baseSize, patternSupport,
				conLargeItem, conLargeItemSupport, T,
				i, headerSize, headerTableLink);
	}

	free(pattern);
	free(conLargeItem);
	free(conLargeItemSupport);
 }

 return;
	
}
void Fptree::displayResult()
{
	LargeItemPtr aLargeItemset;
	FILE *fp;
	int i, j;
	
	if ((fp = fopen(outFile, "w")) == NULL) {
        printf("Can't open data file, %s.\n", outFile);
        exit(1);
	}
	fprintf(fp, "K_{max} = %d\n\n", realK);
	
	for (i=0; i < realK; i++) {
		
		fprintf(fp, "%d Large %d-itemsets:\n", numLarge[i], i+1);
		
		if (numLarge[i] == 0) break;
		
		printf("\n%d Large %d-itemsets[support]:\n", numLarge[i], i+1);
		
		/* Visit the large (i+1)-itemsets' resulting list */
		aLargeItemset = largeItemset[i];
		
		/* print out the large (i+1)-itemsets */
		while (aLargeItemset != NULL) {
			
			/* print an (i+1)-itemset */
			for (j=0; j <= i; j++) {
				printf("%d ", aLargeItemset->itemset[j]);
				fprintf(fp, "%d ", aLargeItemset->itemset[j]);
			}
			
			/* print the support of the itemset */
			printf("[%d]\n", aLargeItemset->support);
			fprintf(fp, "[%d]\n", aLargeItemset->support);
			
			aLargeItemset = aLargeItemset->next;
		}
		printf("\n");
		fprintf(fp, "\n");
	}
	
	fclose(fp);
	
	return;
}

void Fptree::DisplayTime()
{
	
	/* output execution time ---------------------*/
	printf("Build tree time:\n");
#if !PLAT_WIN
	m_userTime =
		((float) (myTime2.ru_utime.tv_sec  - myTime1.ru_utime.tv_sec)) +
		((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6;
	m_sysTime =
		((float) (myTime2.ru_stime.tv_sec  - myTime1.ru_stime.tv_sec)) +
		((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6;
	printf("User time : %f seconds\n", m_userTime);
	printf("System time : %f seconds\n\n", m_sysTime);
#else
#if !SIM_TIME
	m_sysTime = m_tm2.m_ktime - m_tm1.m_ktime;
	printf("System time : %f seconds\n", m_sysTime);
#endif
	m_userTime = m_tm2.m_utime - m_tm1.m_utime;
	printf("Use time: %f seconds\n\n", m_userTime);
#endif
	printf("FP-tree growth time:\n");
#if !PLAT_WIN
	m_userTime =
		((float) (myTime3.ru_utime.tv_sec  - myTime2.ru_utime.tv_sec)) +
		((float) (myTime3.ru_utime.tv_usec - myTime2.ru_utime.tv_usec)) * 1e-6;
	m_sysTime =
		((float) (myTime3.ru_stime.tv_sec  - myTime2.ru_stime.tv_sec)) +
		((float) (myTime3.ru_stime.tv_usec - myTime2.ru_stime.tv_usec)) * 1e-6;
	printf("User time : %f seconds\n", m_userTime);
	printf("System time : %f seconds\n", m_sysTime);
#else
#if !SIM_TIME
	m_sysTime = m_tm3.m_ktime - m_tm2.m_ktime;
	printf("System time : %f seconds\n", m_sysTime);
#endif
	m_userTime = m_tm3.m_utime - m_tm2.m_utime;
	printf("Use time: %f seconds\n\n", m_userTime);
#endif
	printf("Total execution time:\n");
#if !PLAT_WIN
	m_userTime =
		((float) (myTime3.ru_utime.tv_sec  - myTime1.ru_utime.tv_sec)) +
		((float) (myTime3.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -