📄 fptree.cpp
字号:
// Fptree1.cpp: implementation of the Fptree class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Fptree.h"
#include <stdlib.h>
#include <time.h>
#include <fstream>
#include <string>
using namespace std;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
Fptree::Fptree()
{
m_sumflag = false;
m_userTime = m_sysTime = 0.0;
}
Fptree::~Fptree()
{
}
int Fptree::Execute(const char *param)
{
/* read input parameters --------------------------*/
printf("input\n");
input(param);
#if !PLAT_WIN
getrusage(RUSAGE_SELF,&myTime1);
#else
m_tm1 = m_line.GetTime(true);
#endif
/* pass 1 : Mine the large 1-itemsets -------------*/
printf("\npass1\n");
ParseItemSet();
/* Mine the large k-itemsets (k = 2 to realK) -----*/
if (numLarge[0] > 0) {
/* create FP-tree --------------------------*/
printf("\nbuildTree\n");
BuildTree();
#if !PLAT_WIN
getrusage(RUSAGE_SELF,&myTime2);
#else
m_tm2 = m_line.GetTime(true);
#endif
/* mining frequent patterns ----------------*/
printf("\npassK\n");
m_headerTableSize= numLarge[0];
numLarge[0] = 0;
FPgrowth(root, headerTableLink, m_headerTableSize, NULL, 0);
#if !PLAT_WIN
getrusage(RUSAGE_SELF,&myTime3);
#else
m_tm3 = m_line.GetTime(true);
#endif
/* output result of large itemsets ---------*/
printf("\nresult\n");
displayResult();
}
DisplayTime();
/* free memory ------------------------------------*/
printf("\ndestroy\n");
this->Destroy();
return 0;
}
void Fptree::ParseItemSet()
{
int transSize;
int item;
int maxSize=0;
FILE *fp;
int i, j;
/* Initialize the 1-itemsets list and support list */
support1 = (int *) malloc (sizeof(int) * numItem);
largeItem1 = (int *) malloc (sizeof(int) * numItem);
if ((support1 == NULL) || (largeItem1 == NULL)) {
printf("out of memory\n");
exit(1);
}
for (i=0; i < numItem; i++) {
support1[i] = 0;
largeItem1[i] = i;
}
/* scan DB to count the frequency of each item */
if ((fp = fopen(dataFile, "r")) == NULL) {
printf("Can't open data file, %s.\n", dataFile);
exit(1);
}
/* Scan each transaction of the DB */
for (i=0; i < numTrans; i++) {
/* Read the transaction size */
// fscanf(fp, "%d", &transSize);
m_line.ReadLine(fp);
transSize = m_line.GetFieldSize();
/* Mark down the largest transaction size found so far */
if (transSize > maxSize)
maxSize = transSize;
/* Read the items in the transaction */
for (j=0; j < transSize; j++) {
//fscanf(fp, "%d", &item);
item = m_line.GetBufInt(j);
support1[item]++;
}
}
fclose(fp);
/* Determine the upper limit of itemset size to be mined according to DB and user input.
* If the user specified maximum itemset size (expectedK) is greater than
* the largest transaction size (maxSize) in the database, or exptectedK <= 0,
* then set realK = maxSize;
* otherwise realK = expectedK
*/
realK = expectedK;
if ((maxSize < expectedK) || (expectedK <= 0))
realK = maxSize;
printf("max transaction sizes = %d\n", maxSize);
printf("max itemset size (K_max) to be mined = %d\n", realK);
/* Initialize large k-itemset resulting list and corresponding support list */
largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK);
numLarge = (int *) malloc (sizeof(int) * realK);
if ((largeItemset == NULL) || (numLarge == NULL)) {
printf("out of memory\n");
exit(1);
}
for (i=0; i < realK; i++) {
largeItemset[i] = NULL;
numLarge[i] = 0;
}
/* Sort the supports of 1-itemsets in descending order */
q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem);
/*
for (i=0; i < numItem; i++)
printf("%d[%d] ", largeItem1[i], support1[i]);
printf("\n");
*/
numLarge[0] = 0;
while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold))
(numLarge[0])++;
printf("\nNo. of large 1-itemsets (numLarge[0]) = %d\n", numLarge[0]);
return;
}
void Fptree::BuildTree()
{
int *freqItemP; /* Store frequent items of a transaction */
int *indexList; /* indexList[i] = the index position in the large 1-item list storing freqItemP[i] */
int count; /* Number of frequent items in a transaction */
FILE *fp; /* Pointer to the database file */
int transSize; /* Transaction size */
int item; /* An item in the transaction */
int i, j, m;
int path; /* Number of new tree paths (i.e. new leaf nodes) created so far */
/* Create header table */
headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]);
if (headerTableLink == NULL) {
printf("out of memory\n");
exit(1);
}
for (i=0; i < numLarge[0]; i++)
headerTableLink[i] = NULL;
/* Create root of the FP-tree */
root = (FPTreeNode) malloc (sizeof(FPNode));
if (root == NULL) {
printf("out of memory\n");
exit(1);
}
/* Initialize the root node */
root->numPath = 1;
root->parent = NULL;
root->children = NULL;
root->hlink = NULL;
/* Create freqItemP to store frequent items of a transaction */
freqItemP = (int *) malloc (sizeof(int) * numItem);
if (freqItemP == NULL) {
printf("out of memory\n");
exit(1);
}
indexList = (int *) malloc (sizeof(int) * numItem);
if (indexList == NULL) {
printf("out of memory\n");
exit(1);
}
/* scan DB and insert frequent items into the FP-tree */
if ((fp = fopen(dataFile, "r")) == NULL) {
printf("Can't open data file, %s.\n", dataFile);
exit(1);
}
for (i=0; i < numTrans; i++) {
/* Read the transaction size */
// fscanf(fp, "%d", &transSize);
m_line.ReadLine(fp);
transSize = m_line.GetFieldSize();
count = 0;
path = 0;
for (j=0; j < transSize; j++) {
/* Read a transaction item */
//fscanf(fp, "%d", &item);
item = m_line.GetBufInt(j);
/* Store the item to the frequent list, freqItemP,
* if it is a large 1-item.
*/
for (m=0; m < numLarge[0]; m++) {
if (item == largeItem1[m]) {
/* Store the item */
freqItemP[count] = item;
/* Store the position in the large 1-itemset list storing this item */
indexList[count] = m;
count++;
break;
}
}
}
/* Sort the items in the frequent item list in ascending order of indexList,
* i.e. sort according to the order of the large 1-itemset list
*/
q_sortA(indexList, freqItemP, 0, count-1, count);
/* Insert the frequent patterns of this transaction to the FP-tree. */
insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path);
}
fclose(fp);
free(freqItemP);
free(indexList);
free(largeItem1);
free(support1);
return;
}
void Fptree::FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize)
{
int count;
int i, j;
int *pattern;
int patternSupport;
FPTreeNode aNode = NULL;
int *conLargeItem;
int *conLargeItemSupport;
if (baseSize >= realK) return;
if (T == NULL) return;
/* Create array, conLargeItem, to store the items in the header table;
* and also an array, conLargeItemSupport, to store the corresponding count value
*/
conLargeItem = (int *) malloc (sizeof(int) * headerSize);
conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize);
if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) {
printf("out of memory\n");
exit(1);
}
if (T->numPath == 1) {
/* Case 1: Single Path */
count = 0;
if (T->children != NULL) aNode = T->children->node;
/* Visit the path in top-down manner, and store the items and count values
*/
while (aNode != NULL) {
conLargeItem[count] = aNode->item;
conLargeItemSupport[count] = aNode->count;
count++;
if (aNode->children != NULL)
aNode = aNode->children->node;
else aNode = NULL;
}
/* Form any combination of items in the path to
* generate large itemsets containing the base items stored in 'baseItems'
*/
combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize);
free(conLargeItem);
free(conLargeItemSupport);
} else {
/* Multiple Path */
/* Create an array to store the base items for a conditional FP-tree.
* Size of the base should be (baseSize + 1).
*/
pattern = (int *) malloc (sizeof(int) * (baseSize + 1));
if (pattern == NULL) {
printf("out of memory\n");
exit(1);
}
/* Visit the header table in a top-down manner.
* -- Find the conditional pattern base for each base item in the header table.
* -- Find the frequent items of the conditional pattern base of the base item.
* -- Build conditional FP-tree for the base item.
* -- Recursively mine the conditional FP-tree.
*/
for (i=0; i < headerSize; i++) {
/* Add the item to the base of its conditional FP-tree
*/
pattern[0] = headerTableLink[i]->item;
/* Add the base of T to the base of the conditional FP-tree
*/
for (j=0; j < baseSize; j++) {
pattern[j+1] = baseItems[j];
}
aNode = headerTableLink[i];
patternSupport = 0;
/* Count the support of the base of the conditional FP-tree
*/
while (aNode != NULL) {
patternSupport += aNode->count;
aNode = aNode->hlink;
}
/* Add the itemset formed by the base items
* to the resulting list because it must be large.
*/
addToLargeList(pattern, patternSupport, baseSize);
/* Find frquent items, build conditional FP-tree and perform mining.
*/
genConditionalPatternTree(pattern, baseSize, patternSupport,
conLargeItem, conLargeItemSupport, T,
i, headerSize, headerTableLink);
}
free(pattern);
free(conLargeItem);
free(conLargeItemSupport);
}
return;
}
void Fptree::displayResult()
{
LargeItemPtr aLargeItemset;
FILE *fp;
int i, j;
if ((fp = fopen(outFile, "w")) == NULL) {
printf("Can't open data file, %s.\n", outFile);
exit(1);
}
fprintf(fp, "K_{max} = %d\n\n", realK);
for (i=0; i < realK; i++) {
fprintf(fp, "%d Large %d-itemsets:\n", numLarge[i], i+1);
if (numLarge[i] == 0) break;
printf("\n%d Large %d-itemsets[support]:\n", numLarge[i], i+1);
/* Visit the large (i+1)-itemsets' resulting list */
aLargeItemset = largeItemset[i];
/* print out the large (i+1)-itemsets */
while (aLargeItemset != NULL) {
/* print an (i+1)-itemset */
for (j=0; j <= i; j++) {
printf("%d ", aLargeItemset->itemset[j]);
fprintf(fp, "%d ", aLargeItemset->itemset[j]);
}
/* print the support of the itemset */
printf("[%d]\n", aLargeItemset->support);
fprintf(fp, "[%d]\n", aLargeItemset->support);
aLargeItemset = aLargeItemset->next;
}
printf("\n");
fprintf(fp, "\n");
}
fclose(fp);
return;
}
void Fptree::DisplayTime()
{
/* output execution time ---------------------*/
printf("Build tree time:\n");
#if !PLAT_WIN
m_userTime =
((float) (myTime2.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) +
((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6;
m_sysTime =
((float) (myTime2.ru_stime.tv_sec - myTime1.ru_stime.tv_sec)) +
((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6;
printf("User time : %f seconds\n", m_userTime);
printf("System time : %f seconds\n\n", m_sysTime);
#else
#if !SIM_TIME
m_sysTime = m_tm2.m_ktime - m_tm1.m_ktime;
printf("System time : %f seconds\n", m_sysTime);
#endif
m_userTime = m_tm2.m_utime - m_tm1.m_utime;
printf("Use time: %f seconds\n\n", m_userTime);
#endif
printf("FP-tree growth time:\n");
#if !PLAT_WIN
m_userTime =
((float) (myTime3.ru_utime.tv_sec - myTime2.ru_utime.tv_sec)) +
((float) (myTime3.ru_utime.tv_usec - myTime2.ru_utime.tv_usec)) * 1e-6;
m_sysTime =
((float) (myTime3.ru_stime.tv_sec - myTime2.ru_stime.tv_sec)) +
((float) (myTime3.ru_stime.tv_usec - myTime2.ru_stime.tv_usec)) * 1e-6;
printf("User time : %f seconds\n", m_userTime);
printf("System time : %f seconds\n", m_sysTime);
#else
#if !SIM_TIME
m_sysTime = m_tm3.m_ktime - m_tm2.m_ktime;
printf("System time : %f seconds\n", m_sysTime);
#endif
m_userTime = m_tm3.m_utime - m_tm2.m_utime;
printf("Use time: %f seconds\n\n", m_userTime);
#endif
printf("Total execution time:\n");
#if !PLAT_WIN
m_userTime =
((float) (myTime3.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) +
((float) (myTime3.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -