⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fptn20.c

📁 关联规则的频繁项集算法bomo的源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
 aNode = headerTable[baseIndex]; while (aNode != NULL) {	aNode2 = aNode->parent;	count = 0;	while (aNode2 != T) {		for (i=0; i < conHeaderSize; i++)			if (aNode2->item == conLargeItem[i]) {				freqItemP[count] = aNode2->item;				indexList[count] = i;				count++;				break;			}		aNode2 = aNode2->parent;	}	q_sortA(indexList, freqItemP, 0, count-1, count);	path = 0;	insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, 				*conRoot, *conHeader, &path);	aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, 			int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, aNode2; int *conLargeItem; int *conLargeItemSupport; int localStartHeader=startHeader; if (baseSize >= realK) return; if (T == NULL) return; if (T->numPath == 1) {	//printf("single path\n");	conLargeItem = (int *) malloc (sizeof(int) * headerSize);	conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize);	if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) {		printf("out of memory\n");		exit(1);	}	count = 0;	if (T->children != NULL) aNode = T->children->node;	while (aNode != NULL) {		conLargeItem[count] = aNode->item;		conLargeItemSupport[count] = aNode->count;		count++;		if (aNode->children != NULL)			aNode = aNode->children->node;		else	aNode = NULL;	}	combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize);	free(conLargeItem);	free(conLargeItemSupport); } else {	//printf("multiple path\n");	pattern = (int *) malloc (sizeof(int) * (baseSize + 1));	if (pattern == NULL) {		printf("out of memory\n");		exit(1);	}	conLargeItem = (int *) malloc (sizeof(int) * headerSize);	conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize);	if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) {		printf("out of memory\n");		exit(1);	}	for (j=0; j < baseSize; j++) {		pattern[j+1] = baseItems[j];	}        if (localStartHeader > headerSize)                localStartHeader = headerSize;        for (i=localStartHeader-1; i >= 0; i--) {                pattern[0] = headerTableLink[i]->item;                aNode = headerTableLink[i];                patternSupport = 0;                while (aNode != NULL) {                        patternSupport += aNode->count;                        aNode = aNode->hlink;                }                if ((baseSize > 0) && (patternSupport < oldThreshold)                        && (patternSupport >= thresholdK[baseSize]))                        addToLargeList(pattern, patternSupport, baseSize);                /* generate conditional pattern base */                for (j=0; j < headerSize; j++)                        conLargeItemSupport[j] = 0;                aNode = headerTableLink[i];                conHeaderSize = 0;                while (aNode != NULL) {                        aNode2 = aNode->parent;                        while (aNode2 != T) {                                for (j=0; j < headerSize; j++)                                        if (aNode2->item == headerTableLink[j]->item) {                                                conLargeItemSupport[j] += aNode->count;                                                if ((conLargeItemSupport[j] >= threshold) &&                                                   (conLargeItemSupport[j] - aNode->count <                                                        threshold)) {                                                        conLargeItem[j] = aNode2->item;                                                        conHeaderSize++;                                                }                                                break;                                        }                                aNode2 = aNode2->parent;                        }                        aNode = aNode->hlink;                }                /* generate conditional pattern tree */                if (conHeaderSize > 0) {                        q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize);                        buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport,                                        T, headerTableLink, i, headerSize);                        FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1);                        free(conHeader);                        destroyTree(conRoot, 1);                }        }        for (i=localStartHeader; i < headerSize; i++) {		pattern[0] = headerTableLink[i]->item;		aNode = headerTableLink[i];		patternSupport = 0;		while (aNode != NULL) {			patternSupport += aNode->count;			aNode = aNode->hlink;		}		if (patternSupport < threshold)			break;		if ((baseSize > 0) && (patternSupport < oldThreshold)			&& (patternSupport >= thresholdK[baseSize]))			addToLargeList(pattern, patternSupport, baseSize);		/* generate conditional pattern base */		for (j=0; j < headerSize; j++)			conLargeItemSupport[j] = 0;		aNode = headerTableLink[i];		conHeaderSize = 0;		while (aNode != NULL) {			aNode2 = aNode->parent; 			while (aNode2 != T) {				for (j=0; j < headerSize; j++)					if (aNode2->item == headerTableLink[j]->item) {						conLargeItemSupport[j] += aNode->count;  						if ((conLargeItemSupport[j] >= threshold) &&						   (conLargeItemSupport[j] - aNode->count < 							threshold)) {							conLargeItem[j] = aNode2->item;							conHeaderSize++;							}						break;					}				aNode2 = aNode2->parent;			}			aNode = aNode->hlink;		}		/* generate conditional pattern tree */		if (conHeaderSize > 0) {			q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize);			buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, 					T, headerTableLink, i, headerSize);			FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); 			free(conHeader); 			destroyTree(conRoot, 1);		}	}	free(pattern);	free(conLargeItem);	free(conLargeItemSupport); } return;}int findCombination(int n, int m){ int factorial1=1, factorial2=1; int i; for (i=n; i > (n-m); i--)	factorial1 *= i; for (i=m; i > 1; i--)	factorial2 *= i; return (factorial1 / factorial2);}void findLimit(){ int i, j; for (i=1; i < realK; i++) {	for (j=i+1; j < realK; j++) {		limit[i] += findCombination(j, i);	}	if (limit[i] > N)		limit[i] = N; } return;}void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j; support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) {	printf("out of memory\n");	exit(1); } limit = (int *) malloc (sizeof(int) * numItem); if (limit == NULL) {	printf("out of memory\n");	exit(1); } for (i=0; i < numItem; i++)	limit[i] = 0; for (i=0; i < numItem; i++) { 	support1[i] = 0;	largeItem1[i] = i; } /* scan DB to count frequency of each item */ if ((fp = fopen(dataFile, "r")) == NULL) {        printf("Can't open data file, %s.\n", dataFile);        exit(1); } for (i=0; i < numTrans; i++) {	fscanf(fp, "%d", &transSize);	if (transSize > maxSize)		maxSize = transSize;	(limit[transSize-1])++;	for (j=0; j < transSize; j++) {		fscanf(fp, "%d", &item);		(support1[item])++;	} }  fclose(fp);  /* initialize large itemset list and support list */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0))	realK = maxSize; printf("maxSize = %d\nrealK = %d\n", maxSize, realK); largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK);  numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) { 	printf("out of memory\n");	exit(1); } for (i=0; i < realK; i++)  {	largeItemset[i] = NULL;	numLarge[i] = 0; } /* sort the support in ascending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++)  	printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ headerTableSize = N; while ((headerTableSize < numItem) && (support1[N-1] == support1[headerTableSize]))	headerTableSize++; numLarge[0] = headerTableSize; headerTableSize = numItem; while (support1[headerTableSize-1] < 1)	headerTableSize--; /* if (headerTableSize >  realK) 	threshold = support1[N-1]; else {	headerTableSize = realK; 	while ((headerTableSize < numItem) && (support1[realK] == support1[headerTableSize]))		headerTableSize++;	if (realK < numItem) 		threshold = support1[realK];	else	threshold = support1[realK-1]; } */ printf("\nheaderTableSize=%d\n", headerTableSize); printf("numLarge[0]=%d\n", numLarge[0]); findLimit(); return;}void buildTree(){ int *freqItemP; int *indexList; int count; FILE *fp; int transSize; int item; int i, j, m; int path; /* build header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * headerTableSize); if (headerTableLink == NULL) {	printf("out of memory\n");	exit(1); } for (i=0; i < numItem; i++)	headerTableLink[i] = NULL; /* create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) {	printf("out of memory\n");	exit(1); } root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) {	printf("out of memory\n");	exit(1); }	 indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) {	printf("out of memory\n");	exit(1); }	 /* scan DB and insert frequent items into the FP-tree */ if ((fp = fopen(dataFile, "r")) == NULL) {        printf("Can't open data file, %s.\n", dataFile);        exit(1); } for (i=0; i < numTrans; i++) {	fscanf(fp, "%d", &transSize);	count = 0; 	path = 0;	for (j=0; j < transSize; j++) {		fscanf(fp, "%d", &item);		for (m=0; m < headerTableSize; m++) {			if (item == largeItem1[m]) {				freqItemP[count] = item;				indexList[count] = m;				count++;				break;			} 		}	}	q_sortA(indexList, freqItemP, 0, count-1, count);	insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, 				root, headerTableLink, &path); }  fclose(fp); free(freqItemP); free(indexList); return;}void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int count; int support; int i, j; if ((fp = fopen(outFile, "w")) == NULL) {        printf("Can't open data file, %s.\n", outFile);        exit(1); } fprintf(fp, "%d\n\n", realK); fprintf(fp, "%d\n", numLarge[0]); if (numLarge[0] > 0) {        printf("\nLarge %d-itemsets[support]:\n", 1);        for (i=0; i < numLarge[0]; i++) {                printf("%d ", largeItem1[i]);                fprintf(fp, "%d ", largeItem1[i]);                printf("[%d]\n", support1[i]);                fprintf(fp, "  %d\n", support1[i]);        }        printf("\n");        fprintf(fp, "\n"); } for (i=1; i < realK; i++) {	if (numLarge[i] == 0) break;	fprintf(fp, "%d\n", numLarge[i]);	printf("\nLarge %d-itemsets[support]:\n", i+1);	aLargeItemset = largeItemset[i];	count = 0;	while (aLargeItemset != NULL) {		count++;		if (count == N)			support = aLargeItemset->support;		else if (count > N)			if (aLargeItemset->support < support)				break;		for (j=0; j <= i; j++) {			printf("%d ", aLargeItemset->itemset[j]);			fprintf(fp, "%d ", aLargeItemset->itemset[j]);		}		printf("[%d]\n", aLargeItemset->support);		fprintf(fp, "  %d\n", aLargeItemset->support);		aLargeItemset = aLargeItemset->next;	}	printf("\n");	fprintf(fp, "\n"); } fclose(fp); return;}void input(char *configFile){ FILE *fp; float thresholdDecimal; if ((fp = fopen(configFile, "r")) == NULL) {        printf("Can't open config. file, %s.\n", configFile);        exit(1); } fscanf(fp, "%d %d %f %d %d %f", &N, &expectedK, &thresholdDecimal, &numItem, &numTrans, &decreaseRate); fscanf(fp, "%s %s %s", dataFile, treeFile, outFile); fclose(fp); printf("N = %d\n", N); printf("expectedK = %d\n", expectedK); printf("thresholdDecimal = %f (dummy)\n", thresholdDecimal); printf("numItem = %d\n", numItem); printf("numTrans = %d\n", numTrans); printf("decreaseRate = %f\n", decreaseRate); printf("dataFile = %s\n", dataFile); printf("treeFile = %s\n", treeFile); printf("outFile = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; printf("threshold = %d (dummy)\n", threshold); oldThreshold = numTrans; if (N > numItem) {	printf("N > numItem\n");	exit(1); }	  return;}void main(int argc, char *argv[]){ float  userTime, sysTime; struct rusage myTime1, myTime2; /* usage ------------------------------------------*/ printf("fptN20.c\n"); printf("FP-tree (finding N-most interesting large itemsets)\n"); if (argc != 3) {        printf("Usage: fptN20 <config. file> <where to start scanning the header table>\n");        exit(1); } /* read input parameters --------------------------*/ printf("input\n"); input(argv[1]); startHeader = atoi(argv[2]); getrusage(RUSAGE_SELF,&myTime1); /* pass 1 -----------------------------------------*/ printf("\npass1\n"); pass1(); if (startHeader == 0)	startHeader = headerTableSize / 2; printf("startHeader=%d\n", startHeader); /* create FP-tree --------------------------*/ printf("\nbuildTree\n"); buildTree(); /* initialize thresholdK -------------------*/ printf("\ninitThreshold\n"); initThresholdK(); round++; printf("round = %d, threshold = %d\n", round, threshold); /* mining frequent patterns ----------------*/ printf("\npassK\n"); FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); getrusage(RUSAGE_SELF,&myTime2); /* output result of large itemsets ----------------*/ printf("\nresult\n"); displayResult(); /* free memory ------------------------------------*/ printf("\ndestroy\n"); destroy(); /* output running time ----------------------------*/ printf("build tree time:\n"); userTime =     		((float) (myTime2.ru_utime.tv_sec  - myTime1.ru_utime.tv_sec)) +       		((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; sysTime =       		((float) (myTime2.ru_stime.tv_sec  - myTime1.ru_stime.tv_sec)) +       		((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; printf("User time : %f seconds\n",userTime); printf("System time : %f seconds\n\n",sysTime); return;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -