📄 fpt.c

📁 fp-treevc实现的
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* fpt.c (release mode)  * * Use threshold for finding large itemsets with supports >= the threshold.  * This is the implementation using the FP-tree structure according to the paper: * 	Jiawei Han, Yiwen Yin: Mining Frequent Patterns without Candidate Generation, * 	ACM SIGMOD 2000, pages 1-12. * * Program Input: *	A configuration file consisting of 6 parameters *	1. User specified maximum size of itemset to be mined *	   If this value is not larger than zero or  *	   is greater than the greatest transaction size in the DB, *	   then it will be set to the greatest transaction size. *	2. Normalized support threshold, range: (0, 1] *	3. Total number of different items in the DB *	4. Total number of transactions in the DB *	5. Data file name *	6. Result file name for storing the large itemsets * * Program Output: *	The frequent itemsets are displayed from small to large sizes. *	For each size, the itemsets are sorted in descending order of their supports. *	The support of each large itemset will also be displayed (enclosed by a bracket). *	It will output to both screen (standard output) and the result file. * */ #include<stdio.h>#include<stdlib.h>#include<string.h>#include <sys/resource.h>#include <sys/times.h>#include <unistd.h>/***** Data Structure *****//* Description: *	Each node of an FP-tree is represented by a 'FPnode' structure. *	Each node contains an item ID, count value of the item, and *	node-link as stated in the paper. *	 */typedef struct FPnode *FPTreeNode;	/* Pointer to a FP-tree node */typedef struct Childnode *childLink;	/* Pointer to children of a FP-tree node *//* * Children of a FP-tree node */typedef struct Childnode {	FPTreeNode node;	/* A child node of an item */	childLink next;		/* Next child */} ChildNode;/* * A FP-tree node */typedef struct FPnode {        int item;		/* ID of the item.  				   Value of ID is within the range [0, m-1]				   where m is the total number of different items in the database. */        int count;		/* Value of count of the item.				   This is the number of transactions containing items in the portion				   of the path reaching this node. */	int numPath;  		/* Number of leaf nodes in the subtree			           rooted at this node.  It is used to				   check whether there is only a single path 				   in the FPgrowth function. */	FPTreeNode parent;	/* Pointer to parent node */        childLink children;	/* Pointer to children */        FPTreeNode hlink;	/* Horizontal link to next node with same item */} FPNode;/* * A list to store large itemsets in descending order of their supports. * It stores all the itemsets of supports >= threshold. */typedef struct Itemsetnode *LargeItemPtr;typedef struct Itemsetnode {	int support;	int *itemset;	LargeItemPtr next;} ItemsetNode;		void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize);/***** Global Variables *****/LargeItemPtr *largeItemset;	/* largeItemset[k-1] = array of large k-itemsets */int *numLarge;			/* numLarge[k-1] = no. of large k-itemsets found. */int *support1;			/* Support of 1-itemsets */int *largeItem1;		/* 1-itemsets */FPTreeNode root=NULL;		/* Initial FP-tree */FPTreeNode *headerTableLink;	/* Corresponding header table */int expectedK;			/* User input upper limit of itemset size to be mined */int realK;			/* Actual upper limit of itemset size can be mined */int threshold;			/* User input support threshold */int numItem;			/* Number of items in the database */int numTrans;			/* Number of transactions in the database */char dataFile[100];		/* File name of the database */char outFile[100];		/* File name to store the result of mining *//****************************************************************************************** * Function: destroyTree * * Description: *	Destroy the FPtree rooted by a node and free the allocated memory. *	For each tree node being visited, all the child nodes *	are destroyed in a recursive manner before the destroy of the node. * * Invoked from:	 * 	destroy() *  * Input Parameter: *	node	-> Root of the tree/subtree to be destroyed. */void destroyTree(FPTreeNode node){ childLink temp1, temp2; if (node == NULL) return; temp1 = node->children; while(temp1 != NULL) {	temp2 = temp1->next;	destroyTree(temp1->node);	free(temp1);	temp1 = temp2; } free(node); return;}/****************************************************************************************** * Function: destroy * * Description: *	Free memory of following variables. *	- largeItemset *	- numLarge *	- headerTableLink *	- root * * Invoked from:	 * 	main() *  * Functions to be invoked: *	destroyTree()	-> Free memory from the FP-tree, root. * * Global variables (read only): *	- realK */void destroy(){ LargeItemPtr aLargeItemset;  int i; for (i=0; i < realK; i++) {	aLargeItemset = largeItemset[i];	while (aLargeItemset != NULL) {		largeItemset[i] = largeItemset[i]->next;		free(aLargeItemset->itemset);		free(aLargeItemset);		aLargeItemset = largeItemset[i];	} } free(largeItemset); free(numLarge);  free(headerTableLink); destroyTree(root); return;}/****************************************************************************************** * Function: swap * * Description: *	Swap x-th element and i-th element of each of the *	two arrays, support[] and itemset[]. * * Invoked from:	 *	q_sortD() *	q_sortA() *  * Functions to be invoked: None * * Input Parameters: *	support	-> Corresponding supports of the items in itemset. *	itemset	-> Array of items. *	x, i	-> The two indexes for swapping. * * Global variable: None */void swap(int *support, int *itemset, int x, int i){  int temp;  temp = support[x]; support[x] = support[i]; support[i] = temp; temp = itemset[x]; itemset[x] = itemset[i]; itemset[i] = temp;  return;}/****************************************************************************************** * Function: q_sortD * * Description: * 	Quick sort two arrays, support[] and the corresponding itemset[],  *	in descending order of support[]. * * Invoked from:	 *	pass1() *	genConditionalPatternTree() *	q_sortD() *  * Functions to be invoked: *	swap() *	q_sortD() * * Input Parameters: *      low		-> lower bound index of the array to be sorted *      high		-> upper bound index of the array to be sorted *      size		-> size of the array *	length		-> length of an itemset * * In/Out Parameters: *      support[]	-> array to be sorted *      itemset[]	-> array to be sorted */void q_sortD(int *support, int *itemset, int low,int high, int size){ int pass; int highptr=high++;     /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do {	/* Find out, from the head of support[], 	 * the 1st element value not larger than the pivot's 	 */	pass=1;	while(pass==1) {		if(++low<size) {			if(support[low] > support[pivot])				pass=1;			else pass=0;		} else pass=0;	} 	/* Find out, from the tail of support[], 	 * the 1st element value not smaller than the pivot's 	 */ 	pass=1; 	while(pass==1) {		if(high-->0) { 			if(support[high] < support[pivot]) 				pass=1;			else pass=0; 		} else pass=0; 	}	/* swap elements pointed by low pointer & high pointer */	if(low<high)		swap(support, itemset, low, high); } while(low<=high); swap(support, itemset, pivot, high); /* divide list into two for further sorting */  q_sortD(support, itemset, pivot, high-1, size);  q_sortD(support, itemset, high+1, highptr, size);  return;}/****************************************************************************************** * Function: q_sortA * * Description: * 	Quick sort two arrays, indexList[] and the corresponding freqItemP[],  *	in ascending order of indexList[]. * * Invoked from:	 *	buildTree() *	buildConTree() *	q_sortA() *  * Functions to be invoked: *	swap() *	q_sortA() * * Input Parameters: *      low		-> lower bound index of the array to be sorted *      high		-> upper bound index of the array to be sorted *      size		-> size of the array *	length		-> length of an itemset * * In/Out Parameters: *      indexList[]	-> array to be sorted *      freqItemP[]	-> array to be sorted */void q_sortA(int *indexList, int *freqItemP, int low, int high, int size){ int pass; int highptr=high++;     /* highptr records the last element */ /* the first element in list is always served as the pivot */ int pivot=low; if(low>=highptr) return; do {        /* Find out, from the head of indexList[], 	 * the 1st element value not smaller than the pivot's 	 */        pass=1;        while(pass==1) {                if(++low<size) {                        if(indexList[low] < indexList[pivot])                                pass=1;                        else pass=0;                } else pass=0;        }        /* Find out, from the tail of indexList[],	 * 1st element value not larger than the pivot's 	 */        pass=1;        while(pass==1) {                if(high-->0) {                        if(indexList[high] > indexList[pivot])                                pass=1;                        else pass=0;                } else pass=0;        }        /* swap elements pointed by low pointer & high pointer */        if(low<high)                swap(indexList, freqItemP, low, high); } while(low<=high); swap(indexList, freqItemP, pivot, high); /* divide list into two for further sorting */ q_sortA(indexList, freqItemP, pivot, high-1, size); q_sortA(indexList, freqItemP, high+1, highptr, size); return;}/****************************************************************************************** * Function: addToLargeList * * Description: *	Add a large itemset, i.e. an itemset of support >= threshold, *	to the large itemsets list. * * Invoked from:	 *	FPgrowth() *	combine() *  * Functions to be invoked: None * * Input parameters: *	pattern[]	-> The large itemset to be inserted *	patternSupport	-> Support of the itemset *	index		-> Number of items in the itemset - 1 * * Global variables: *	numLarge[]	-> Increment this counter by 1 to represent *			   the current number of large (index+1)-itemsets found */void addToLargeList(int *pattern, int patternSupport, int index){ LargeItemPtr aLargeItemset; LargeItemPtr aNode, previous=NULL; int i; /* Create a node to store the itemset */ aLargeItemset = (LargeItemPtr) malloc (sizeof(ItemsetNode)); if (aLargeItemset == NULL) {	printf("out of memory\n");	exit(1); } aLargeItemset->itemset = (int *) malloc (sizeof(int) * (index+1)); if (aLargeItemset->itemset == NULL) {	printf("out of memory\n");	exit(1); } /* Store the support of the itemset */ aLargeItemset->support = patternSupport; /* Store the items of the itemset */ for (i=0; i <= index; i++) {	aLargeItemset->itemset[i] = pattern[i]; } aLargeItemset->next = NULL; /* Assign aNode to point to the head of the resulting list */ aNode = largeItemset[index]; /* Insert the itemset to the (index+1)-itemset resulting list.   * There are 3 cases for the insertion:  *	1. The resulting list is empty  *		-- insert the itemset to the head of the list  *	2. The itemset should be inserted somewhere between   *	   the head and tail of the list  *		-- locate the suitable position of the list according to its support  *		-- insert the itemset  *	3. The itemset's support is less than the supports of all the itemsets in the list  *		-- append the itemset at the end of the list  */ if (aNode == NULL) {	/* Case 1: The resulting list is empty */		largeItemset[index] = aLargeItemset; } else { 	while ((aNode != NULL) && (aNode->support > patternSupport)) {		previous = aNode;		aNode = aNode->next; 	}	/* Case 2: Insert between head and tail of the list */ 	if (previous != NULL) {		previous->next = aLargeItemset;		aLargeItemset->next = aNode;	} else {		/* Case 3: Append to the end of the list */		aLargeItemset->next = largeItemset[index];		largeItemset[index] = aLargeItemset;	} } /* Update the counter for the number of large (index+1)-itemsets in the list */ (numLarge[index])++;  return;}/****************************************************************************************** * Function: combine * * Description: *	Make all possible combinations of itemsets for a single path FP-tree.	 *	Any of the combinations is a large itemset. * * Invoked from:	 *	FPgrowth() *	combine() *  * Functions to be invoked: *	addToLargeList() *	combine() * * Input parameters: *  	*itemList	-> Array to hold all items in the FP-tree path *  	*support	-> Counts of the items in the path (in *itemList) *  	*base		-> A large itemset discovered which will be used to combine *			   with additional items in *itemList to form more large itemsets  *  	start		-> Position in *itemList where the base is formed *              	   from subset of the set of items in the prefix to this position, *			   and new large pattern will combine the base with *			   one additional element from the suffix to this position. *  	baseSize	-> Size of the base, i.e. number of items in base *     */void combine(int *itemList, int *support, int start, int itemListSize, int *base, int baseSize){ int *pattern; int i, j;  if (baseSize >= realK) return; if (start == itemListSize) return; /* Create an array of size (baseSize+1) to store any itemset formed from  * the union of *base and   * any item of *itemsetListSize from the position of start to the end  */ pattern = (int *) malloc (sizeof(int) * (baseSize+1)); if (pattern == NULL) {	printf("out of memory\n");	exit(1); } /* Insert all the items in base[] to pattern[]   */ for (j=0; j < baseSize; j++)	pattern[j] = base[j]; for (i=start; i < itemListSize; i++) {	/* Append an item, itemList[i], to pattern[] 	 */	pattern[baseSize] = itemList[i];	/* Insert pattern[] to the result list of large (baseSizes+1)-itemsets.	 * Support of this itemset = support[i] 	 */	addToLargeList(pattern , support[i], baseSize);	/* Form pattern[] with 	 * any item in *itemListSize from position (i+1) to the end of itemListSize	 */	combine(itemList, support, i+1, itemListSize, pattern, baseSize+1);	 }
12 3 下一页
💿 文件大小 38 K
👤 上传用户 feitiandy001
📂 所属分类其他
📄 代码行数 1,585 行
💻 语言类型 C语言
🏷️ 相关标签

#fp-treevc
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -