⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fpt.c

📁 数据挖掘相关算法
💻 C
📖 第 1 页 / 共 3 页
字号:
	 * Size of the base should be (baseSize + 1). 	 */	pattern = (int *) malloc (sizeof(int) * (baseSize + 1));	if (pattern == NULL) {		printf("out of memory\n");		exit(1);	} 	/* Visit the header table in a top-down manner. 	 * -- Find the conditional pattern base for each base item in the header table.	 * -- Find the frequent items of the conditional pattern base of the base item.	 * -- Build conditional FP-tree for the base item.	 * -- Recursively mine the conditional FP-tree.	 */	for (i=0; i < headerSize; i++) {		/* Add the item to the base of its conditional FP-tree		 */		pattern[0] = headerTableLink[i]->item;		/* Add the base of T to the base of the conditional FP-tree		 */		for (j=0; j < baseSize; j++) {			pattern[j+1] = baseItems[j];		}		aNode = headerTableLink[i];		patternSupport = 0;		/* Count the support of the base of the conditional FP-tree		 */		while (aNode != NULL) {			patternSupport += aNode->count;			aNode = aNode->hlink;		}		/* Add the itemset formed by the base items 		 * to the resulting list because it must be large. 		 */		addToLargeList(pattern, patternSupport, baseSize);		/* Find frquent items, build conditional FP-tree and perform mining.		 */		genConditionalPatternTree(pattern, baseSize, patternSupport,				conLargeItem, conLargeItemSupport, T,				i, headerSize, headerTableLink);	}	free(pattern);	free(conLargeItem);	free(conLargeItemSupport); } return;}/****************************************************************************************** * Function: pass1() * * Description: *	Scan the DB and find the support of each item. *	Find the large 1-itemsets according to the support threshold. * * Invoked from:	 *	main() * * Functions to be invoked: *	q_sortD() * * Global variables: *	largeItem1[]	-> Array to store 1-itemsets *	support1[]	-> Support[i] = support of the 1-itemset stored in largeItem[i] *	largeItemset[]	-> largeItemset[i] = resulting list for large (i+1)-itemsets *	realK		-> Maximum size of itemset to be mined *	numLarge[]	-> numLarge[i] = Number of large (i+1)-itemsets discovered so far * * Global variables (read only): *	numTrans	-> number of transactions in the database *	expectedK	-> User specified maximum size of itemset to be mined *	dataFile	-> Database file *	 */void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j; /* Initialize the 1-itemsets list and support list */ support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) {	printf("out of memory\n");	exit(1); } for (i=0; i < numItem; i++) { 	support1[i] = 0;	largeItem1[i] = i; } /* scan DB to count the frequency of each item */ if ((fp = fopen(dataFile, "r")) == NULL) {        printf("Can't open data file, %s.\n", dataFile);        exit(1); } /* Scan each transaction of the DB */ for (i=0; i < numTrans; i++) {	/* Read the transaction size */	fscanf(fp, "%d", &transSize);	/* Mark down the largest transaction size found so far */	if (transSize > maxSize)		maxSize = transSize;	/* Read the items in the transaction */	for (j=0; j < transSize; j++) {		fscanf(fp, "%d", &item);		support1[item]++;	} }  fclose(fp);  /* Determine the upper limit of itemset size to be mined according to DB and user input.   * If the user specified maximum itemset size (expectedK) is greater than   * the largest transaction size (maxSize) in the database, or  exptectedK <= 0,  * then set  realK = maxSize;  * otherwise  realK = expectedK  */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0))	realK = maxSize; printf("max transaction sizes = %d\n", maxSize); printf("max itemset size (K_max) to be mined  = %d\n", realK); /* Initialize large k-itemset resulting list and corresponding support list */ largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK);  numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) {	printf("out of memory\n");	exit(1); } for (i=0; i < realK; i++)  {	largeItemset[i] = NULL;	numLarge[i] = 0; } /* Sort the supports of 1-itemsets in descending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++)  	printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ numLarge[0] = 0; while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold))	(numLarge[0])++; printf("\nNo. of large 1-itemsets (numLarge[0]) = %d\n", numLarge[0]); return;}/****************************************************************************************** * Function: buildTree() * * Description: *	Build the initial FP-tree. * * Invoked from:	 *	main() * * Functions to be invoked: *	insert_tree() *	q_sortA() * * Global variables: *	root		-> Pointer to the root of this initial FP-tree *	headerTableLink	-> Header table for this initial FP-tree * * Global variables (read only): *	numLarge[]	-> Large k-itemsets resulting list for k = 1 to realK */void buildTree(){ int *freqItemP;	/* Store frequent items of a transaction */ int *indexList;	/* indexList[i] = the index position in the large 1-item list storing freqItemP[i] */ int count;		/* Number of frequent items in a transaction */ FILE *fp;		/* Pointer to the database file */ int transSize;		/* Transaction size */ int item;		/* An item in the transaction */ int i, j, m; int path;		/* Number of new tree paths (i.e. new leaf nodes) created so far */ /* Create header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]); if (headerTableLink == NULL) {	printf("out of memory\n");	exit(1); } for (i=0; i < numLarge[0]; i++)	headerTableLink[i] = NULL;	 /* Create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) {	printf("out of memory\n");	exit(1); } /* Initialize the root node */ root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; /* Create freqItemP to store frequent items of a transaction */ freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) {	printf("out of memory\n");	exit(1); }	 indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) {	printf("out of memory\n");	exit(1); }	 /* scan DB and insert frequent items into the FP-tree */ if ((fp = fopen(dataFile, "r")) == NULL) {        printf("Can't open data file, %s.\n", dataFile);        exit(1); } for (i=0; i < numTrans; i++) {	/* Read the transaction size */	fscanf(fp, "%d", &transSize);	count = 0; 	path = 0;	for (j=0; j < transSize; j++) {		/* Read a transaction item */		fscanf(fp, "%d", &item);		/* Store the item to the frequent list, freqItemP, 		 * if it is a large 1-item.		 */		for (m=0; m < numLarge[0]; m++) {			if (item == largeItem1[m]) {				/* Store the item */				freqItemP[count] = item;				/* Store the position in the large 1-itemset list storing this item */				indexList[count] = m;				count++;				break;			} 		}	}	/* Sort the items in the frequent item list in ascending order of indexList,	 * i.e. sort according to the order of the large 1-itemset list	 */	q_sortA(indexList, freqItemP, 0, count-1, count);	/* Insert the frequent patterns of this transaction to the FP-tree. */	insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); }  fclose(fp); free(freqItemP); free(indexList); free(largeItem1); free(support1); return;}/****************************************************************************************** * Function: displayResult * * Description: *	Output the large itemsets of all sizes *	to both screen and result file. * * Invoked from:	 *	main() * * Functions to be invoked: None * * Global variables (read only): *	realK		-> maximum size of itemsets *	numLarge[]	-> numLarge[i] = large (i+1)-itemsets' resulting list *	outFile		-> result file to store the output result */void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int i, j; if ((fp = fopen(outFile, "w")) == NULL) {        printf("Can't open data file, %s.\n", outFile);        exit(1); } fprintf(fp, "K_{max} = %d\n\n", realK); for (i=0; i < realK; i++) {	fprintf(fp, "%d Large %d-itemsets:\n", numLarge[i], i+1);	if (numLarge[i] == 0) break;	printf("\n%d Large %d-itemsets[support]:\n", numLarge[i], i+1);	/* Visit the large (i+1)-itemsets' resulting list */	aLargeItemset = largeItemset[i];	/* print out the large (i+1)-itemsets */	while (aLargeItemset != NULL) {		/* print an (i+1)-itemset */		for (j=0; j <= i; j++) {			printf("%d ", aLargeItemset->itemset[j]);			fprintf(fp, "%d ", aLargeItemset->itemset[j]);		}		/* print the support of the itemset */		printf("[%d]\n", aLargeItemset->support);		fprintf(fp, "[%d]\n", aLargeItemset->support);		aLargeItemset = aLargeItemset->next;	}	printf("\n");	fprintf(fp, "\n"); } fclose(fp); return;}/****************************************************************************************** * Function: input * * Description: *	Read the input parameters from the configuration file. * * Invoked from:	 *	main() * * Functions to be invoked: None * * Input parameters: *	*configFile	-> The configuration file * * Global variables: *	expectedK		-> User specified maximum size of itemset to be mined *	thresholdDecimal	-> Normalized support threshold, range: (0, 1] *	numItem			-> Total number of different items in the DB *	numTrans		-> Total number of transactions in the DB *	dataFile		-> Data file *	outFile			-> Result file for storing the large itemsets */void input(char *configFile){ FILE *fp; float thresholdDecimal; if ((fp = fopen(configFile, "r")) == NULL) {        printf("Can't open config. file, %s.\n", configFile);        exit(1); } fscanf(fp, "%d %f %d %d", &expectedK, &thresholdDecimal, &numItem, &numTrans); fscanf(fp, "%s %s", dataFile, outFile); fclose(fp); printf("expectedK = %d\n", expectedK); printf("thresholdDecimal = %f\n", thresholdDecimal); printf("numItem = %d\n", numItem); printf("numTrans = %d\n", numTrans); printf("dataFile = %s\n", dataFile); printf("outFile = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; if (threshold == 0) threshold = 1; printf("threshold = %d\n", threshold); return;}/****************************************************************************************** * Function: main * * Description: *	This function reads the config. file for six input parameters, *	finds the frequent 1-itemsets, builds the initial FP-tree  *	using the frequent 1-itemsets and  *	peforms the FP-growth algorithm of the paper. *	It measure both CPU and I/O time for build tree and mining. * * Functions to be invoked:  *	input()		-> Read config. file *	pass1()		-> Scan DB and find frquent 1-itemsets *	buildTree()	-> Build the initial FP-tree *	FPgrowth()	-> Start mining *	 * Parameters: *	Config. file name */void main(int argc, char *argv[]){ float  userTime, sysTime; struct rusage myTime1, myTime2, myTime3; int headerTableSize; /* Usage ------------------------------------------*/ printf("\nFP-tree: Mining large itemsets using user support threshold\n\n"); if (argc != 2) {        printf("Usage: %s <config. file>\n\n", argv[0]);	printf("Content of config. file:\n");	printf("  Line 1: Upper limit of large itemsets size to be mined\n");	printf("  Line 2: Support threshold (normalized to [0, 1])\n");	printf("  Line 3: No. of different items in the DB\n");	printf("  Line 4: No. of transactions in the DB\n");	printf("  Line 5: File name of the DB\n");	printf("  Line 6: Result file name to store the large itemsets\n\n");        exit(1); } /* read input parameters --------------------------*/ printf("input\n"); input(argv[1]); getrusage(RUSAGE_SELF,&myTime1); /* pass 1 : Mine the large 1-itemsets -------------*/ printf("\npass1\n"); pass1(); /* Mine the large k-itemsets (k = 2 to realK) -----*/ if (numLarge[0] > 0) {	/* create FP-tree --------------------------*/ 	printf("\nbuildTree\n"); 	buildTree();	getrusage(RUSAGE_SELF,&myTime2);	/* mining frequent patterns ----------------*/ 	printf("\npassK\n");	headerTableSize = numLarge[0];	numLarge[0] = 0; 	FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); 	getrusage(RUSAGE_SELF,&myTime3); 	/* output result of large itemsets ---------*/ 	printf("\nresult\n"); 	displayResult(); 	/* output execution time ---------------------*/ 	printf("Build tree time:\n"); 	userTime =       		((float) (myTime2.ru_utime.tv_sec  - myTime1.ru_utime.tv_sec)) +       		((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; 	sysTime =       		((float) (myTime2.ru_stime.tv_sec  - myTime1.ru_stime.tv_sec)) +       		((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; 	printf("User time : %f seconds\n", userTime); 	printf("System time : %f seconds\n\n", sysTime); 	printf("FP-tree growth time:\n"); 	userTime =       		((float) (myTime3.ru_utime.tv_sec  - myTime2.ru_utime.tv_sec)) +       		((float) (myTime3.ru_utime.tv_usec - myTime2.ru_utime.tv_usec)) * 1e-6; 	sysTime =       		((float) (myTime3.ru_stime.tv_sec  - myTime2.ru_stime.tv_sec)) +       		((float) (myTime3.ru_stime.tv_usec - myTime2.ru_stime.tv_usec)) * 1e-6; 	printf("User time : %f seconds\n", userTime); 	printf("System time : %f seconds\n\n", sysTime);	printf("Total execution time:\n"); 	userTime =       		((float) (myTime3.ru_utime.tv_sec  - myTime1.ru_utime.tv_sec)) +       		((float) (myTime3.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; 	sysTime =       		((float) (myTime3.ru_stime.tv_sec  - myTime1.ru_stime.tv_sec)) +       		((float) (myTime3.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; 	printf("User time : %f seconds\n", userTime); 	printf("System time : %f seconds\n", sysTime);	printf("Total time: %f seconds\n\n", userTime + sysTime); } /* free memory ------------------------------------*/ printf("\ndestroy\n"); destroy(); return;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -