⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fpgrowth.cpp

📁 我编写的能够实现频繁关联模式挖掘的FP-Growth数据挖掘算法。
💻 CPP
📖 第 1 页 / 共 2 页
字号:
			if (ancestorNode->item == conLargeItem[i]) {				freqItemP[count] = ancestorNode->item;				indexList[count] = i;				count++;				break;			}		}		ancestorNode = ancestorNode->parent;	}	q_sortA(indexList, freqItemP, 0, count-1, count);	path = 0;	/* Insert the frequent items to the temp FP-tree.	 */	insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path);	aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}void gentempPatternTree(int *pattern, int baseSize, int patternSupport,				int *conLargeItem, int *conLargeItemSupport, FPTreeNode T,				int headerIndex, int headerSize, FPTreeNode *headerTableLink){ int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, ancestorNode; int j; for (j=0; j < headerSize; j++)	conLargeItemSupport[j] = 0; aNode = headerTableLink[headerIndex]; conHeaderSize = 0; while (aNode != NULL) {	ancestorNode = aNode->parent; 	while (ancestorNode != T) {		for (j=0; j < headerSize; j++) {			if (ancestorNode->item == headerTableLink[j]->item) {				/* Increment the temp support count 				 * for this ancestor item    				 */				conLargeItemSupport[j] += aNode->count;  				if ((conLargeItemSupport[j] >= threshold) &&				   (conLargeItemSupport[j] - aNode->count < 					threshold)) {					/* Add the ancestor item to the temp pattern base,					 * and update the number of items in the					 * temp header table 					 */					conLargeItem[j] = ancestorNode->item;					conHeaderSize++;					}				break;			}		}		ancestorNode = ancestorNode->parent;	}	/* Next node of the FP-tree containing the base item	 */	aNode = aNode->hlink; } q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); /* Generate the temp FP-tree and mine recursively   */ if (conHeaderSize > 0) {	/* Build temp FP-tree 	 */	buildtemptree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, 			T, headerTableLink, headerIndex, headerSize);	/* Mining 	 */	FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); 	free(conHeader); 	destroyTree(conRoot); } return;}void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; FPTreeNode aNode = NULL; int *conLargeItem; int *conLargeItemSupport; if (baseSize >= realK) return; if (T == NULL) return; conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) {	printf("out of memory\n");	exit(1); } if (T->numPath == 1) {	/* Case 1: Single Path */	count = 0;	if (T->children != NULL) aNode = T->children->node;	/* Visit the path in top-down manner, and store the items and count values 	 */	while (aNode != NULL) {		conLargeItem[count] = aNode->item;		conLargeItemSupport[count] = aNode->count;		count++;		if (aNode->children != NULL)			aNode = aNode->children->node;		else	aNode = NULL;	}	combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize);	/*zzz*/	free(conLargeItem);	free(conLargeItemSupport); } else {	/* Multiple Path */	pattern = (int *) malloc (sizeof(int) * (baseSize + 1));	if (pattern == NULL) {		printf("out of memory\n");		exit(1);	}	for (i=0; i < headerSize; i++) {		/* Add the item to the base of its temp FP-tree		 */		pattern[0] = headerTableLink[i]->item;		/* Add the base of T to the base of the temp FP-tree		 */		for (j=0; j < baseSize; j++) {			pattern[j+1] = baseItems[j];		}		aNode = headerTableLink[i];		patternSupport = 0;		/* Count the support of the base of the temp FP-tree		 */		while (aNode != NULL) {			patternSupport += aNode->count;			aNode = aNode->hlink;		}    	addToLargeList(pattern, patternSupport, baseSize);		/* Find frquent items, build temp FP-tree and perform mining.		 */		gentempPatternTree(pattern, baseSize, patternSupport,				conLargeItem, conLargeItemSupport, T,i, headerSize, headerTableLink);		  }	free(pattern);	free(conLargeItem);	free(conLargeItemSupport); } return;}void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j,readcnt,temp; ifstream in(dataFile); char str[255];  char cID[6]; /* Initialize the 1-itemsets list and support list */ support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) {	printf("out of memory\n");	exit(1); } for (i=0; i < numItem; i++) { 	support1[i] = 0;	largeItem1[i] = i; } /* scan DB to count the frequency of each item */


 while(!in.eof())								
	{
		in.getline(str,sizeof(str));
		memset(cID, '\0', 6);
		readcnt=0;
		transSize=0;
		
		for(i=0;i<sizeof(str);i++)
		{
			if (str[i]==' '||str[i]==','||i==sizeof(str)-1)
			{
				temp=atoi(cID);
				memset(cID, 0, 6);
				readcnt=0;
				transSize++;
				item=temp;
				support1[item]++;
			}
			else	
			{
				cID[readcnt]=str[i];
				readcnt++;
			}
		}
		if (transSize > maxSize)
		maxSize = transSize;

	}
 
in.close();/***/ /*for(i=0;i<=numItem;i++)	 support1[i]=0; */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0))	realK = maxSize; printf("最大项目集长度 = %d\n", maxSize); printf("输出的最大项目集长度 (K_max)  = %d\n", realK); /* Initialize large k-itemset resulting list and corresponding support list */ largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK);  numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) {	printf("out of memory\n");	exit(1); } for (i=0; i < realK; i++)  {	largeItemset[i] = NULL;	numLarge[i] = 0; } /* Sort the supports of 1-itemsets in descending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++)  	printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ numLarge[0] = 0; while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold))	(numLarge[0])++; printf("\n频繁的单个项目个数 = %d\n", numLarge[0]); in.close(); return;}void buildTree(){ int *freqItemP;	/* Store frequent items of a transaction */ int *indexList;	 int count;		/* Number of frequent items in a transaction */ FILE *fp;		/* Pointer to the database file */ int transSize;		/* Transaction size */ int item;		 int path;		 ifstream in(dataFile); char str[255];  char cID[6]; int i,j,m,readcnt,temp; /* Create header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]); if (headerTableLink == NULL) {	printf("out of memory\n");	exit(1); } for (i=0; i < numLarge[0]; i++)	headerTableLink[i] = NULL;	 /* Create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) {	printf("out of memory\n");	exit(1); } /* Initialize the root node */ root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; /* Create freqItemP to store frequent items of a transaction */ freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) {	printf("out of memory\n");	exit(1); }	 indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) {	printf("out of memory\n");	exit(1); }	while(!in.eof())								{		in.getline(str,sizeof(str));		memset(cID, '\0', 6);		readcnt=0;		count=0;		path=0;				for(i=0;i<sizeof(str);i++)		{			if (str[i]==' '||str[i]==','||i==sizeof(str)-1)			{				temp=atoi(cID);				memset(cID, 0, 6);				readcnt=0;				item=temp;				/****/				for (m=0; m < numLarge[0]; m++) {			if (item == largeItem1[m]) {				/* Store the item */				freqItemP[count] = item;				/* Store the position in the large 1-itemset list storing this item */				indexList[count] = m;				count++;				break;				} 				}				/***/			}			else				{				cID[readcnt]=str[i];				readcnt++;			}		}		q_sortA(indexList, freqItemP, 0, count-1, count);	/* Insert the frequent patterns of this transaction to the FP-tree. */	insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); }  in.close(); free(freqItemP); free(indexList); //free(largeItem1); //free(support1); return;}void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int i, j; if ((fp = fopen(outFile, "w")) == NULL) {        printf("不能打开文件, %s.\n", outFile);        exit(1); } fprintf(fp, "K_{max} = %d\n\n", realK); for (i=0; i < realK; i++) {		if (numLarge[i] == 0) break;	/* Visit the large (i+1)-itemsets' resulting list */	aLargeItemset = largeItemset[i];	/* print out the large (i+1)-itemsets */	while (aLargeItemset != NULL) {		/* print an (i+1)-itemset */		for (j=0; j <= i; j++) {			printf("%d ", aLargeItemset->itemset[j]);			fprintf(fp, "%d ", aLargeItemset->itemset[j]);		}			printf("[%d] ", aLargeItemset->support);		fprintf(fp, "[%d] ", aLargeItemset->support);

			printf("{%f}\n", aLargeItemset->corr);
		fprintf(fp, "{%f}\n", aLargeItemset->corr);		aLargeItemset = aLargeItemset->next;	}	printf("\n");	fprintf(fp, "\n"); } fclose(fp); return;}void input(){ FILE *fp; float thresholdDecimal; 	expectedK=25; 	thresholdDecimal=0.3;	numItem=128;	numTrans=8124;	correlate=0.7;	dataFile="inputFile";	outFile="resultFile";	threshold = thresholdDecimal * numTrans;	 printf("最大输出项目集长度 = %d\n", expectedK); printf("支持度 = %f\n", thresholdDecimal); printf("项目个数 = %d\n", numItem); printf("项目集个数 = %d\n", numTrans); printf("输入文件 = %s\n", dataFile); printf("输出文件 = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; if (threshold == 0) threshold = 1; printf("支持数 = %d\n", threshold); printf("关联度 = %f\n",correlate);	return;}void main(int argc, char *argv[]){ int i; int headerTableSize; time(&start); /* print parameters --*/ printf("状态\n"); input(); /* Mine 1-itemsets -*/ printf("\n初始化成功\n"); pass1();

  buildTree(); if (numLarge[0] > 0) {	 	printf("\nFP-Tree建立成功\n");
	 	buildTree();		 	headerTableSize = numLarge[0];	numLarge[0] = 0; 	FPgrowth(root, headerTableLink, headerTableSize, NULL, 0);  	printf("\n运行结果\n"); 	displayResult();  		 }
 /* free memory */ printf("\ndestroy\n");
 time(&finish);
 used_time = finish-start;
 printf("用时: %f秒",used_time);  destroy(); 
  getchar(); free(largeItem1); free(support1); 
   return;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -