📄 fpt.c
字号:
/* fpt.c (release mode) * * Use threshold for finding large itemsets with supports >= the threshold. * This is the implementation using the FP-tree structure according to the paper: * Jiawei Han, Yiwen Yin: Mining Frequent Patterns without Candidate Generation, * ACM SIGMOD 2000, pages 1-12. * * Program Input: * A configuration file consisting of 6 parameters * 1. User specified maximum size of itemset to be mined * If this value is not larger than zero or * is greater than the greatest transaction size in the DB, * then it will be set to the greatest transaction size. * 2. Normalized support threshold, range: (0, 1] * 3. Total number of different items in the DB * 4. Total number of transactions in the DB * 5. Data file name * 6. Result file name for storing the large itemsets * * Program Output: * The frequent itemsets are displayed from small to large sizes. * For each size, the itemsets are sorted in descending order of their supports. * The support of each large itemset will also be displayed (enclosed by a bracket). * It will output to both screen (standard output) and the result file. * */ #include<stdio.h>#include<stdlib.h>#include<string.h>#include <sys/resource.h>#include <sys/times.h>#include <unistd.h>/***** Data Structure *****//* Description: * Each node of an FP-tree is represented by a 'FPnode' structure. * Each node contains an item ID, count value of the item, and * node-link as stated in the paper. * */typedef struct FPnode *FPTreeNode; /* Pointer to a FP-tree node */typedef struct Childnode *childLink; /* Pointer to children of a FP-tree node *//* * Children of a FP-tree node */typedef struct Childnode { FPTreeNode node; /* A child node of an item */ childLink next; /* Next child */} ChildNode;/* * A FP-tree node */typedef struct FPnode { int item; /* ID of the item. Value of ID is within the range [0, m-1] where m is the total number of different items in the database. */ int count; /* Value of count of the item. This is the number of transactions containing items in the portion of the path reaching this node. */ int numPath; /* Number of leaf nodes in the subtree rooted at this node. It is used to check whether there is only a single path in the FPgrowth function. */ FPTreeNode parent; /* Pointer to parent node */ childLink children; /* Pointer to children */ FPTreeNode hlink; /* Horizontal link to next node with same item */} FPNode;/* * A list to store large itemsets in descending order of their supports. * It stores all the itemsets of supports >= threshold. */typedef struct Itemsetnode *LargeItemPtr;typedef struct Itemsetnode { int support; int *itemset; LargeItemPtr next;} ItemsetNode; void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize);/***** Global Variables *****/LargeItemPtr *largeItemset; /* largeItemset[k-1] = array of large k-itemsets */int *numLarge; /* numLarge[k-1] = no. of large k-itemsets found. */int *support1; /* Support of 1-itemsets */int *largeItem1; /* 1-itemsets */FPTreeNode root=NULL; /* Initial FP-tree */FPTreeNode *headerTableLink; /* Corresponding header table */int expectedK; /* User input upper limit of itemset size to be mined */int realK; /* Actual upper limit of itemset size can be mined */int threshold; /* User input support threshold */int numItem; /* Number of items in the database */int numTrans; /* Number of transactions in the database */char dataFile[100]; /* File name of the database */char outFile[100]; /* File name to store the result of mining *//****************************************************************************************** * Function: destroyTree * * Description: * Destroy the FPtree rooted by a node and free the allocated memory. * For each tree node being visited, all the child nodes * are destroyed in a recursive manner before the destroy of the node. * * Invoked from: * destroy() * * Input Parameter: * node -> Root of the tree/subtree to be destroyed. */void destroyTree(FPTreeNode node){ childLink temp1, temp2; if (node == NULL) return; temp1 = node->children; while(temp1 != NULL) { temp2 = temp1->next; destroyTree(temp1->node); free(temp1); temp1 = temp2; } free(node); return;}/****************************************************************************************** * Function: destroy * * Description: * Free memory of following variables. * - largeItemset * - numLarge * - headerTableLink * - root * * Invoked from: * main() * * Functions to be invoked: * destroyTree() -> Free memory from the FP-tree, root. * * Global variables (read only): * - realK */void destroy(){ LargeItemPtr aLargeItemset; int i; for (i=0; i < realK; i++) { aLargeItemset = largeItemset[i]; while (aLargeItemset != NULL) { largeItemset[i] = largeItemset[i]->next; free(aLargeItemset->itemset); free(aLargeItemset); aLargeItemset = largeItemset[i]; } } free(largeItemset); free(numLarge); free(headerTableLink); destroyTree(root); return;}/****************************************************************************************** * Function: swap * * Description: * Swap x-th element and i-th element of each of the * two arrays, support[] and itemset[]. * * Invoked from: * q_sortD() * q_sortA() * * Functions to be invoked: None * * Input Parameters: * support -> Corresponding supports of the items in itemset. * itemset -> Array of items. * x, i -> The two indexes for swapping. * * Global variable: None */void swap(int *support, int *itemset, int x, int i){ int temp; temp = support[x]; support[x] = support[i]; support[i] = temp; temp = itemset[x]; itemset[x] = itemset[i]; itemset[i] = temp; return;}/****************************************************************************************** * Function: q_sortD * * Description: * Quick sort two arrays, support[] and the corresponding itemset[], * in descending order of support[]. * * Invoked from: * pass1() * genConditionalPatternTree() * q_sortD() * * Functions to be invoked: * swap() * q_sortD() * * Input Parameters: * low -> lower bound index of the array to be sorted * high -> upper bound index of the array to be sorted * size -> size of the array * length -> length of an itemset * * In/Out Parameters: * support[] -> array to be sorted * itemset[] -> array to be sorted */void q_sortD(int *support, int *itemset, int low,int high, int size){ int pass; int highptr=high++; /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do { /* Find out, from the head of support[], * the 1st element value not larger than the pivot's */ pass=1; while(pass==1) { if(++low<size) { if(support[low] > support[pivot]) pass=1; else pass=0; } else pass=0; } /* Find out, from the tail of support[], * the 1st element value not smaller than the pivot's */ pass=1; while(pass==1) { if(high-->0) { if(support[high] < support[pivot]) pass=1; else pass=0; } else pass=0; } /* swap elements pointed by low pointer & high pointer */ if(low<high) swap(support, itemset, low, high); } while(low<=high); swap(support, itemset, pivot, high); /* divide list into two for further sorting */ q_sortD(support, itemset, pivot, high-1, size); q_sortD(support, itemset, high+1, highptr, size); return;}/****************************************************************************************** * Function: q_sortA * * Description: * Quick sort two arrays, indexList[] and the corresponding freqItemP[], * in ascending order of indexList[]. * * Invoked from: * buildTree() * buildConTree() * q_sortA() * * Functions to be invoked: * swap() * q_sortA() * * Input Parameters: * low -> lower bound index of the array to be sorted * high -> upper bound index of the array to be sorted * size -> size of the array * length -> length of an itemset * * In/Out Parameters: * indexList[] -> array to be sorted * freqItemP[] -> array to be sorted */void q_sortA(int *indexList, int *freqItemP, int low, int high, int size){ int pass; int highptr=high++; /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do { /* Find out, from the head of indexList[], * the 1st element value not smaller than the pivot's */ pass=1; while(pass==1) { if(++low<size) { if(indexList[low] < indexList[pivot]) pass=1; else pass=0; } else pass=0; } /* Find out, from the tail of indexList[], * 1st element value not larger than the pivot's */ pass=1; while(pass==1) { if(high-->0) { if(indexList[high] > indexList[pivot]) pass=1; else pass=0; } else pass=0; } /* swap elements pointed by low pointer & high pointer */ if(low<high) swap(indexList, freqItemP, low, high); } while(low<=high); swap(indexList, freqItemP, pivot, high); /* divide list into two for further sorting */ q_sortA(indexList, freqItemP, pivot, high-1, size); q_sortA(indexList, freqItemP, high+1, highptr, size); return;}/****************************************************************************************** * Function: addToLargeList * * Description: * Add a large itemset, i.e. an itemset of support >= threshold, * to the large itemsets list. * * Invoked from: * FPgrowth() * combine() * * Functions to be invoked: None * * Input parameters: * pattern[] -> The large itemset to be inserted * patternSupport -> Support of the itemset * index -> Number of items in the itemset - 1 * * Global variables: * numLarge[] -> Increment this counter by 1 to represent * the current number of large (index+1)-itemsets found */void addToLargeList(int *pattern, int patternSupport, int index){ LargeItemPtr aLargeItemset; LargeItemPtr aNode, previous=NULL; int i; /* Create a node to store the itemset */ aLargeItemset = (LargeItemPtr) malloc (sizeof(ItemsetNode)); if (aLargeItemset == NULL) { printf("out of memory\n"); exit(1); } aLargeItemset->itemset = (int *) malloc (sizeof(int) * (index+1)); if (aLargeItemset->itemset == NULL) { printf("out of memory\n"); exit(1); } /* Store the support of the itemset */ aLargeItemset->support = patternSupport; /* Store the items of the itemset */ for (i=0; i <= index; i++) { aLargeItemset->itemset[i] = pattern[i]; } aLargeItemset->next = NULL; /* Assign aNode to point to the head of the resulting list */ aNode = largeItemset[index]; /* Insert the itemset to the (index+1)-itemset resulting list. * There are 3 cases for the insertion: * 1. The resulting list is empty * -- insert the itemset to the head of the list * 2. The itemset should be inserted somewhere between * the head and tail of the list * -- locate the suitable position of the list according to its support * -- insert the itemset * 3. The itemset's support is less than the supports of all the itemsets in the list * -- append the itemset at the end of the list */ if (aNode == NULL) { /* Case 1: The resulting list is empty */ largeItemset[index] = aLargeItemset; } else { while ((aNode != NULL) && (aNode->support > patternSupport)) { previous = aNode; aNode = aNode->next; } /* Case 2: Insert between head and tail of the list */ if (previous != NULL) { previous->next = aLargeItemset; aLargeItemset->next = aNode; } else { /* Case 3: Append to the end of the list */ aLargeItemset->next = largeItemset[index]; largeItemset[index] = aLargeItemset; } } /* Update the counter for the number of large (index+1)-itemsets in the list */ (numLarge[index])++; return;}/****************************************************************************************** * Function: combine * * Description: * Make all possible combinations of itemsets for a single path FP-tree. * Any of the combinations is a large itemset. * * Invoked from: * FPgrowth() * combine() * * Functions to be invoked: * addToLargeList() * combine() * * Input parameters: * *itemList -> Array to hold all items in the FP-tree path * *support -> Counts of the items in the path (in *itemList) * *base -> A large itemset discovered which will be used to combine * with additional items in *itemList to form more large itemsets * start -> Position in *itemList where the base is formed * from subset of the set of items in the prefix to this position, * and new large pattern will combine the base with * one additional element from the suffix to this position. * baseSize -> Size of the base, i.e. number of items in base * */void combine(int *itemList, int *support, int start, int itemListSize, int *base, int baseSize){ int *pattern; int i, j; if (baseSize >= realK) return; if (start == itemListSize) return; /* Create an array of size (baseSize+1) to store any itemset formed from * the union of *base and * any item of *itemsetListSize from the position of start to the end */ pattern = (int *) malloc (sizeof(int) * (baseSize+1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } /* Insert all the items in base[] to pattern[] */ for (j=0; j < baseSize; j++) pattern[j] = base[j]; for (i=start; i < itemListSize; i++) { /* Append an item, itemList[i], to pattern[] */ pattern[baseSize] = itemList[i]; /* Insert pattern[] to the result list of large (baseSizes+1)-itemsets. * Support of this itemset = support[i] */ addToLargeList(pattern , support[i], baseSize); /* Form pattern[] with * any item in *itemListSize from position (i+1) to the end of itemListSize */ combine(itemList, support, i+1, itemListSize, pattern, baseSize+1); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -