📄 fpgrowth.cpp
字号:
#include<stdio.h>#include<stdlib.h>#include<string.h>#include<fstream.h>#include<time.h>typedef struct FPnode *FPTreeNode; /* Pointer to a FP-tree node */typedef struct Childnode *childLink; /* Pointer to children of a FP-tree node */typedef struct Childnode { FPTreeNode node; /* A child node of an item */ childLink next; /* Next child */} ChildNode;typedef struct FPnode { int item; int count; int numPath; FPTreeNode parent; /* Pointer to parent node */ childLink children; /* Pointer to children */ FPTreeNode hlink; /* Horizontal link to next node with same item */} FPNode;typedef struct Itemsetnode *LargeItemPtr;typedef struct Itemsetnode { int support;
float corr; int *itemset; LargeItemPtr next;} ItemsetNode; void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize);LargeItemPtr *largeItemset; int *numLarge; int *support1; int *largeItem1; FPTreeNode root=NULL; /* Initial FP-tree */FPTreeNode *headerTableLink; /* Corresponding header table */int expectedK; /* User input upper limit of itemset size to be mined */int realK; /* Actual upper limit of itemset size can be mined */int threshold; int numItem; /* Number of items in the database */int numTrans; /* Number of transactions in the database */float correlate;char *dataFile; char *outFile; time_t start,finish;float used_time;int findsup(int inputnum){ int i; int j; i=0; j=0; for (i=0; i < numItem; i++) if (largeItem1[i]==inputnum) { j=support1[i]; break; } /* support1[i] = 0; largeItem1[i] = i;*/ return j;}void destroyTree(FPTreeNode node){ childLink temp1, temp2; if (node == NULL) return; temp1 = node->children; while(temp1 != NULL) { temp2 = temp1->next; destroyTree(temp1->node); free(temp1); temp1 = temp2; } free(node); return;}void destroy(){ LargeItemPtr aLargeItemset; int i; for (i=0; i < realK; i++) { aLargeItemset = largeItemset[i]; while (aLargeItemset != NULL) { largeItemset[i] = largeItemset[i]->next; free(aLargeItemset->itemset); free(aLargeItemset); aLargeItemset = largeItemset[i]; } } free(largeItemset); free(numLarge); free(headerTableLink); destroyTree(root); return;}void swap(int *support, int *itemset, int x, int i){ int temp; temp = support[x]; support[x] = support[i]; support[i] = temp; temp = itemset[x]; itemset[x] = itemset[i]; itemset[i] = temp; return;}void q_sortD(int *support, int *itemset, int low,int high, int size){ int pass; int highptr=high++; /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do { pass=1; while(pass==1) { if(++low<size) { if(support[low] > support[pivot]) pass=1; else pass=0; } else pass=0; } /* Find out, from the tail of support[], * the 1st element value not smaller than the pivot's */ pass=1; while(pass==1) { if(high-->0) { if(support[high] < support[pivot]) pass=1; else pass=0; } else pass=0; } /* swap elements pointed by low pointer & high pointer */ if(low<high) swap(support, itemset, low, high); } while(low<=high); swap(support, itemset, pivot, high); /* divide list into two for further sorting */ q_sortD(support, itemset, pivot, high-1, size); q_sortD(support, itemset, high+1, highptr, size); return;}void q_sortA(int *indexList, int *freqItemP, int low, int high, int size){ int pass; int highptr=high++; /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do { pass=1; while(pass==1) { if(++low<size) { if(indexList[low] < indexList[pivot]) pass=1; else pass=0; } else pass=0; } /* Find out, from the tail of indexList[], * 1st element value not larger than the pivot's */ pass=1; while(pass==1) { if(high-->0) { if(indexList[high] > indexList[pivot]) pass=1; else pass=0; } else pass=0; } /* swap elements pointed by low pointer & high pointer */ if(low<high) swap(indexList, freqItemP, low, high); } while(low<=high); swap(indexList, freqItemP, pivot, high); /* divide list into two for further sorting */ q_sortA(indexList, freqItemP, pivot, high-1, size); q_sortA(indexList, freqItemP, high+1, highptr, size); return;}void addToLargeList(int *pattern, int patternSupport, int index){ LargeItemPtr aLargeItemset; LargeItemPtr aNode, previous=NULL; int i,c1,c2; int temp=0;
float cor; /* Judge whether to add the itemset */ //printf("%d\n",pattern[0]);// printf("%d\n",pattern[index]); // printf("%d\n",temp);// temp=temp*correlate;
//printf("%d\n",temp); c1=findsup(pattern[0]);
for (i=0;i<=index;i++)
{
c2=findsup(pattern[i]);
if (c2>c1) c1=c2;
}
temp=c1*correlate;
if (patternSupport<=temp ) return;
cor=float(patternSupport)/float(c1);
/* Create a node to store the itemset */ aLargeItemset = (LargeItemPtr) malloc (sizeof(ItemsetNode)); if (aLargeItemset == NULL) { printf("out of memory\n"); exit(1); } aLargeItemset->itemset = (int *) malloc (sizeof(int) * (index+1)); if (aLargeItemset->itemset == NULL) { printf("out of memory\n"); exit(1); } /* Store the support of the itemset */ aLargeItemset->support = patternSupport;
aLargeItemset->corr=cor; /* Store the items of the itemset */ for (i=0; i <= index; i++) { aLargeItemset->itemset[i] = pattern[i]; } aLargeItemset->next = NULL; /* Assign aNode to point to the head of the resulting list */ aNode = largeItemset[index]; if (aNode == NULL) { /* Case 1: The resulting list is empty */ largeItemset[index] = aLargeItemset; } else { while ((aNode != NULL) && (aNode->support > patternSupport)) { previous = aNode; aNode = aNode->next; } /* Case 2: Insert between head and tail of the list */ if (previous != NULL) { previous->next = aLargeItemset; aLargeItemset->next = aNode; } else { /* Case 3: Append to the end of the list */ aLargeItemset->next = largeItemset[index]; largeItemset[index] = aLargeItemset; } } (numLarge[index])++; return;}void combine(int *itemList, int *support, int start, int itemListSize, int *base, int baseSize){ int *pattern; int i, j; if (baseSize >= realK) return; if (start == itemListSize) return; /* Create an array of size (baseSize+1) to store any itemset formed from * the union of *base and * any item of *itemsetListSize from the position of start to the end */ pattern = (int *) malloc (sizeof(int) * (baseSize+1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } /* Insert all the items in base[] to pattern[] */ for (j=0; j < baseSize; j++) pattern[j] = base[j]; for (i=start; i < itemListSize; i++) { /* Append an item, itemList[i], to pattern[] */ pattern[baseSize] = itemList[i]; /* Insert pattern[] to the result list of large (baseSizes+1)-itemsets. * Support of this itemset = support[i] */ addToLargeList(pattern , support[i], baseSize); /* Form pattern[] with * any item in *itemListSize from position (i+1) to the end of itemListSize */ combine(itemList, support, i+1, itemListSize, pattern, baseSize+1); } free(pattern); return;}void insert_tree(int *freqItemP, int *indexList, int count, int ptr, int length, FPTreeNode T, FPTreeNode *headerTableLink, int *path) { childLink newNode; FPTreeNode hNode; FPTreeNode hPrevious; childLink previous; childLink aNode; /* If all the items have been inserted */ if (ptr == length) return; /* Case 1 : If the current subtree has no children */ if (T->children == NULL) { /* T has no children */ /* Create child nodes for this node */ newNode = (childLink) malloc (sizeof(ChildNode)); if (newNode == NULL) { printf("out of memory\n"); exit(1); } /* Create a first child to store the item */ newNode->node = (FPTreeNode) malloc (sizeof(FPNode)); if (newNode->node == NULL) { printf("out of memory\n"); exit(1); } /* Store information of the item */ newNode->node->item = freqItemP[ptr]; newNode->node->count = count; newNode->node->numPath = 1; newNode->node->parent = T; newNode->node->children = NULL; newNode->node->hlink = NULL; newNode->next = NULL; T->children = newNode; /* Link the node to the header table */ hNode = headerTableLink[indexList[ptr]]; if (hNode == NULL) { /* Place the node at the front of the horizontal link for the item */ headerTableLink[indexList[ptr]] = newNode->node; } else { /* Place the node at the end using the horizontal link */ while (hNode != NULL) { hPrevious = hNode; hNode = hNode->hlink; } hPrevious->hlink = newNode->node; } /* Insert next item, freqItemP[ptr+1], to the tree */ insert_tree(freqItemP, indexList, count, ptr+1, length, T->children->node, headerTableLink, path); T->numPath += *path; } else { aNode = T->children; while ((aNode != NULL) && (aNode->node->item != freqItemP[ptr])) { previous = aNode; aNode = aNode->next; } if (aNode == NULL) { /* Case 2: Create a new child for T */ newNode = (childLink) malloc (sizeof(ChildNode)); if (newNode == NULL) { printf("out of memory\n"); exit(1); } newNode->node = (FPTreeNode) malloc (sizeof(FPNode)); if (newNode->node == NULL) { printf("out of memory\n"); exit(1); } /* Store information of the item */ newNode->node->item = freqItemP[ptr]; newNode->node->count = count; newNode->node->numPath = 1; newNode->node->parent = T; newNode->node->children = NULL; newNode->node->hlink = NULL; newNode->next = NULL; previous->next = newNode; /* Link the node to the header table */ hNode = headerTableLink[indexList[ptr]]; if (hNode == NULL) { /* Place the node at the front of the horizontal link for the item */ headerTableLink[indexList[ptr]] = newNode->node; } else { /* Place the node at the end using the horizontal link */ while (hNode != NULL) { hPrevious = hNode; hNode = hNode->hlink; } hPrevious->hlink = newNode->node; } /* Insert next item, freqItemP[ptr+1], to the tree */ insert_tree(freqItemP, indexList, count, ptr+1, length, newNode->node, headerTableLink, path); (*path)++; T->numPath += *path; } else { aNode->node->count += count; insert_tree(freqItemP, indexList, count, ptr+1, length, aNode->node, headerTableLink, path); T->numPath += *path; } } return;}void buildtemptree(FPTreeNode *conRoot, FPTreeNode **conHeader, int conHeaderSize, int *conLargeItem, int *conLargeItemSupport, FPTreeNode T, FPTreeNode *headerTable, int baseIndex, int headerSize){ FPTreeNode aNode; FPTreeNode ancestorNode; int *freqItemP; int *indexList; int path; int count; int i; /* create temp header table */ *conHeader = (FPTreeNode *) malloc (sizeof(FPTreeNode) * conHeaderSize); if (*conHeader == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < conHeaderSize; i++) (*conHeader)[i] = NULL; /* create root of the temp FP-tree */ (*conRoot) = (FPTreeNode) malloc (sizeof(FPNode)); if (*conRoot == NULL) { printf("out of memory\n"); exit(1); } /* Initialize the root of the temp FP-tree */ (*conRoot)->numPath = 1; (*conRoot)->parent = NULL; (*conRoot)->children = NULL; (*conRoot)->hlink = NULL; freqItemP = (int *) malloc (sizeof(int) * conHeaderSize); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * conHeaderSize); if (indexList == NULL) { printf("out of memory\n"); exit(1); } aNode = headerTable[baseIndex]; while (aNode != NULL) { ancestorNode = aNode->parent; count = 0; while (ancestorNode != T) { for (i=0; i < conHeaderSize; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -