📄 fpgrowth.cpp.bak
字号:
} } ancestorNode = ancestorNode->parent; } q_sortA(indexList, freqItemP, 0, count-1, count); path = 0; /* Insert the frequent items to the temp FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path); aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}void gentempPatternTree(int *pattern, int baseSize, int patternSupport, int *conLargeItem, int *conLargeItemSupport, FPTreeNode T, int headerIndex, int headerSize, FPTreeNode *headerTableLink){ int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, ancestorNode; int j; for (j=0; j < headerSize; j++) conLargeItemSupport[j] = 0; aNode = headerTableLink[headerIndex]; conHeaderSize = 0; while (aNode != NULL) { ancestorNode = aNode->parent; while (ancestorNode != T) { for (j=0; j < headerSize; j++) { if (ancestorNode->item == headerTableLink[j]->item) { /* Increment the temp support count * for this ancestor item */ conLargeItemSupport[j] += aNode->count; if ((conLargeItemSupport[j] >= threshold) && (conLargeItemSupport[j] - aNode->count < threshold)) { /* Add the ancestor item to the temp pattern base, * and update the number of items in the * temp header table */ conLargeItem[j] = ancestorNode->item; conHeaderSize++; } break; } } ancestorNode = ancestorNode->parent; } /* Next node of the FP-tree containing the base item */ aNode = aNode->hlink; } q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); /* Generate the temp FP-tree and mine recursively */ if (conHeaderSize > 0) { /* Build temp FP-tree */ buildtemptree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, T, headerTableLink, headerIndex, headerSize); /* Mining */ FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); free(conHeader); destroyTree(conRoot); } return;}void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; FPTreeNode aNode = NULL; int *conLargeItem; int *conLargeItemSupport; if (baseSize >= realK) return; if (T == NULL) return; conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) { printf("out of memory\n"); exit(1); } if (T->numPath == 1) { /* Case 1: Single Path */ count = 0; if (T->children != NULL) aNode = T->children->node; /* Visit the path in top-down manner, and store the items and count values */ while (aNode != NULL) { conLargeItem[count] = aNode->item; conLargeItemSupport[count] = aNode->count; count++; if (aNode->children != NULL) aNode = aNode->children->node; else aNode = NULL; } combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize); /*zzz*/ free(conLargeItem); free(conLargeItemSupport); } else { /* Multiple Path */ pattern = (int *) malloc (sizeof(int) * (baseSize + 1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < headerSize; i++) { /* Add the item to the base of its temp FP-tree */ pattern[0] = headerTableLink[i]->item; /* Add the base of T to the base of the temp FP-tree */ for (j=0; j < baseSize; j++) { pattern[j+1] = baseItems[j]; } aNode = headerTableLink[i]; patternSupport = 0; /* Count the support of the base of the temp FP-tree */ while (aNode != NULL) { patternSupport += aNode->count; aNode = aNode->hlink; } addToLargeList(pattern, patternSupport, baseSize); /* Find frquent items, build temp FP-tree and perform mining. */ gentempPatternTree(pattern, baseSize, patternSupport, conLargeItem, conLargeItemSupport, T,i, headerSize, headerTableLink); } free(pattern); free(conLargeItem); free(conLargeItemSupport); } return;}void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j,readcnt,temp; ifstream in(dataFile); char str[255]; char cID[6]; /* Initialize the 1-itemsets list and support list */ support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < numItem; i++) { support1[i] = 0; largeItem1[i] = i; } /* scan DB to count the frequency of each item */while(!in.eof()) { in.getline(str,sizeof(str)); memset(cID, 0, 6); readcnt=0; for(i=0;i<sizeof(str);i++) { if (str[i]==' '||str[i]==',') { temp=atoi(cID); memset(cID, 0, 6); readcnt=0; item=temp; support1[item]++; } else { cID[readcnt]=str[i]; readcnt++; } } } in.close();/***/ /*for(i=0;i<=numItem;i++) support1[i]=0; */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0)) realK = maxSize; printf("max transaction sizes = %d\n", maxSize); printf("max itemset size (K_max) to be mined = %d\n", realK); /* Initialize large k-itemset resulting list and corresponding support list */ largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK); numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < realK; i++) { largeItemset[i] = NULL; numLarge[i] = 0; } /* Sort the supports of 1-itemsets in descending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++) printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ numLarge[0] = 0; while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold)) (numLarge[0])++; printf("\nNo. of large 1-itemsets (numLarge[0]) = %d\n", numLarge[0]); in.close(); return;}void buildTree(){ int *freqItemP; /* Store frequent items of a transaction */ int *indexList; int count; /* Number of frequent items in a transaction */ FILE *fp; /* Pointer to the database file */ int transSize; /* Transaction size */ int item; int path; ifstream in(dataFile); char str[255]; char cID[6]; int i,j,m,readcnt,temp; /* Create header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]); if (headerTableLink == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < numLarge[0]; i++) headerTableLink[i] = NULL; /* Create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) { printf("out of memory\n"); exit(1); } /* Initialize the root node */ root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; /* Create freqItemP to store frequent items of a transaction */ freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) { printf("out of memory\n"); exit(1); } for(m=0;m<=numItem;m++) { in.getline(str,sizeof(str)); memset(cID, '\0', 6); readcnt=0; count=0; path=0; for(i=0;i<sizeof(str);i++) { if (str[i]==' '||str[i]==',') { temp=atoi(cID); memset(cID, 0, 6); readcnt=0; if (readcnt==0) item=temp; /****/ for (m=0; m < numLarge[0]; m++) { if (item == largeItem1[m]) { /* Store the item */ freqItemP[count] = item; /* Store the position in the large 1-itemset list storing this item */ indexList[count] = m; count++; break; } } /***/ } else { cID[readcnt]=str[i]; readcnt++; } } q_sortA(indexList, freqItemP, 0, count-1, count); /* Insert the frequent patterns of this transaction to the FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); } in.close(); free(freqItemP); free(indexList); //free(largeItem1); //free(support1); return;}void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int i, j; if ((fp = fopen(outFile, "w")) == NULL) { printf("Can't open file, %s.\n", outFile); exit(1); } fprintf(fp, "K_{max} = %d\n\n", realK); for (i=0; i < realK; i++) { if (numLarge[i] == 0) break; /* Visit the large (i+1)-itemsets' resulting list */ aLargeItemset = largeItemset[i]; /* print out the large (i+1)-itemsets */ while (aLargeItemset != NULL) { /* print an (i+1)-itemset */ for (j=0; j <= i; j++) { printf("%d ", aLargeItemset->itemset[j]); fprintf(fp, "%d ", aLargeItemset->itemset[j]); } printf("[%d]\n", aLargeItemset->support); fprintf(fp, "[%d]\n", aLargeItemset->support); aLargeItemset = aLargeItemset->next; } printf("\n"); fprintf(fp, "\n"); } fclose(fp); return;}void input(){ FILE *fp; float thresholdDecimal; expectedK=10; thresholdDecimal=0.5; numItem=4; numTrans=6; correlate=0.5; dataFile="inputFile"; outFile="resultFile"; threshold = thresholdDecimal * numTrans; printf("expectedK = %d\n", expectedK); printf("thresholdDecimal = %f\n", thresholdDecimal); printf("numItem = %d\n", numItem); printf("numTrans = %d\n", numTrans); printf("dataFile = %s\n", dataFile); printf("outFile = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; if (threshold == 0) threshold = 1; printf("threshold = %d\n", threshold); printf("correlate = %f\n",correlate); return;}void main(int argc, char *argv[]){ int headerTableSize; /* print parameters --*/ printf("Status\n"); input(); /* Mine 1-itemsets -*/ printf("\nsucceed\n"); pass1(); if (numLarge[0] > 0) { /* create FP-tree --*/ printf("\nbuildTree\n"); buildTree(); /* mining frequent patterns -*/ headerTableSize = numLarge[0]; numLarge[0] = 0; FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); /* output result*/ printf("\nresult\n"); displayResult(); /* output execution time */ printf("time:\n"); } /* free memory */ printf("\ndestroy\n"); destroy(); getchar(); free(largeItem1); free(support1); return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -