📄 fpgrowth.cpp
字号:
if (ancestorNode->item == conLargeItem[i]) { freqItemP[count] = ancestorNode->item; indexList[count] = i; count++; break; } } ancestorNode = ancestorNode->parent; } q_sortA(indexList, freqItemP, 0, count-1, count); path = 0; /* Insert the frequent items to the temp FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path); aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}void gentempPatternTree(int *pattern, int baseSize, int patternSupport, int *conLargeItem, int *conLargeItemSupport, FPTreeNode T, int headerIndex, int headerSize, FPTreeNode *headerTableLink){ int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, ancestorNode; int j; for (j=0; j < headerSize; j++) conLargeItemSupport[j] = 0; aNode = headerTableLink[headerIndex]; conHeaderSize = 0; while (aNode != NULL) { ancestorNode = aNode->parent; while (ancestorNode != T) { for (j=0; j < headerSize; j++) { if (ancestorNode->item == headerTableLink[j]->item) { /* Increment the temp support count * for this ancestor item */ conLargeItemSupport[j] += aNode->count; if ((conLargeItemSupport[j] >= threshold) && (conLargeItemSupport[j] - aNode->count < threshold)) { /* Add the ancestor item to the temp pattern base, * and update the number of items in the * temp header table */ conLargeItem[j] = ancestorNode->item; conHeaderSize++; } break; } } ancestorNode = ancestorNode->parent; } /* Next node of the FP-tree containing the base item */ aNode = aNode->hlink; } q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); /* Generate the temp FP-tree and mine recursively */ if (conHeaderSize > 0) { /* Build temp FP-tree */ buildtemptree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, T, headerTableLink, headerIndex, headerSize); /* Mining */ FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); free(conHeader); destroyTree(conRoot); } return;}void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; FPTreeNode aNode = NULL; int *conLargeItem; int *conLargeItemSupport; if (baseSize >= realK) return; if (T == NULL) return; conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) { printf("out of memory\n"); exit(1); } if (T->numPath == 1) { /* Case 1: Single Path */ count = 0; if (T->children != NULL) aNode = T->children->node; /* Visit the path in top-down manner, and store the items and count values */ while (aNode != NULL) { conLargeItem[count] = aNode->item; conLargeItemSupport[count] = aNode->count; count++; if (aNode->children != NULL) aNode = aNode->children->node; else aNode = NULL; } combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize); /*zzz*/ free(conLargeItem); free(conLargeItemSupport); } else { /* Multiple Path */ pattern = (int *) malloc (sizeof(int) * (baseSize + 1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < headerSize; i++) { /* Add the item to the base of its temp FP-tree */ pattern[0] = headerTableLink[i]->item; /* Add the base of T to the base of the temp FP-tree */ for (j=0; j < baseSize; j++) { pattern[j+1] = baseItems[j]; } aNode = headerTableLink[i]; patternSupport = 0; /* Count the support of the base of the temp FP-tree */ while (aNode != NULL) { patternSupport += aNode->count; aNode = aNode->hlink; } addToLargeList(pattern, patternSupport, baseSize); /* Find frquent items, build temp FP-tree and perform mining. */ gentempPatternTree(pattern, baseSize, patternSupport, conLargeItem, conLargeItemSupport, T,i, headerSize, headerTableLink); } free(pattern); free(conLargeItem); free(conLargeItemSupport); } return;}void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j,readcnt,temp; ifstream in(dataFile); char str[255]; char cID[6]; /* Initialize the 1-itemsets list and support list */ support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < numItem; i++) { support1[i] = 0; largeItem1[i] = i; } /* scan DB to count the frequency of each item */
while(!in.eof())
{
in.getline(str,sizeof(str));
memset(cID, '\0', 6);
readcnt=0;
transSize=0;
for(i=0;i<sizeof(str);i++)
{
if (str[i]==' '||str[i]==','||i==sizeof(str)-1)
{
temp=atoi(cID);
memset(cID, 0, 6);
readcnt=0;
transSize++;
item=temp;
support1[item]++;
}
else
{
cID[readcnt]=str[i];
readcnt++;
}
}
if (transSize > maxSize)
maxSize = transSize;
}
in.close();/***/ /*for(i=0;i<=numItem;i++) support1[i]=0; */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0)) realK = maxSize; printf("最大项目集长度 = %d\n", maxSize); printf("输出的最大项目集长度 (K_max) = %d\n", realK); /* Initialize large k-itemset resulting list and corresponding support list */ largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK); numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < realK; i++) { largeItemset[i] = NULL; numLarge[i] = 0; } /* Sort the supports of 1-itemsets in descending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++) printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ numLarge[0] = 0; while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold)) (numLarge[0])++; printf("\n频繁的单个项目个数 = %d\n", numLarge[0]); in.close(); return;}void buildTree(){ int *freqItemP; /* Store frequent items of a transaction */ int *indexList; int count; /* Number of frequent items in a transaction */ FILE *fp; /* Pointer to the database file */ int transSize; /* Transaction size */ int item; int path; ifstream in(dataFile); char str[255]; char cID[6]; int i,j,m,readcnt,temp; /* Create header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]); if (headerTableLink == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < numLarge[0]; i++) headerTableLink[i] = NULL; /* Create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) { printf("out of memory\n"); exit(1); } /* Initialize the root node */ root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; /* Create freqItemP to store frequent items of a transaction */ freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) { printf("out of memory\n"); exit(1); } while(!in.eof()) { in.getline(str,sizeof(str)); memset(cID, '\0', 6); readcnt=0; count=0; path=0; for(i=0;i<sizeof(str);i++) { if (str[i]==' '||str[i]==','||i==sizeof(str)-1) { temp=atoi(cID); memset(cID, 0, 6); readcnt=0; item=temp; /****/ for (m=0; m < numLarge[0]; m++) { if (item == largeItem1[m]) { /* Store the item */ freqItemP[count] = item; /* Store the position in the large 1-itemset list storing this item */ indexList[count] = m; count++; break; } } /***/ } else { cID[readcnt]=str[i]; readcnt++; } } q_sortA(indexList, freqItemP, 0, count-1, count); /* Insert the frequent patterns of this transaction to the FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); } in.close(); free(freqItemP); free(indexList); //free(largeItem1); //free(support1); return;}void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int i, j; if ((fp = fopen(outFile, "w")) == NULL) { printf("不能打开文件, %s.\n", outFile); exit(1); } fprintf(fp, "K_{max} = %d\n\n", realK); for (i=0; i < realK; i++) { if (numLarge[i] == 0) break; /* Visit the large (i+1)-itemsets' resulting list */ aLargeItemset = largeItemset[i]; /* print out the large (i+1)-itemsets */ while (aLargeItemset != NULL) { /* print an (i+1)-itemset */ for (j=0; j <= i; j++) { printf("%d ", aLargeItemset->itemset[j]); fprintf(fp, "%d ", aLargeItemset->itemset[j]); } printf("[%d] ", aLargeItemset->support); fprintf(fp, "[%d] ", aLargeItemset->support);
printf("{%f}\n", aLargeItemset->corr);
fprintf(fp, "{%f}\n", aLargeItemset->corr); aLargeItemset = aLargeItemset->next; } printf("\n"); fprintf(fp, "\n"); } fclose(fp); return;}void input(){ FILE *fp; float thresholdDecimal; expectedK=25; thresholdDecimal=0.3; numItem=128; numTrans=8124; correlate=0.7; dataFile="inputFile"; outFile="resultFile"; threshold = thresholdDecimal * numTrans; printf("最大输出项目集长度 = %d\n", expectedK); printf("支持度 = %f\n", thresholdDecimal); printf("项目个数 = %d\n", numItem); printf("项目集个数 = %d\n", numTrans); printf("输入文件 = %s\n", dataFile); printf("输出文件 = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; if (threshold == 0) threshold = 1; printf("支持数 = %d\n", threshold); printf("关联度 = %f\n",correlate); return;}void main(int argc, char *argv[]){ int i; int headerTableSize; time(&start); /* print parameters --*/ printf("状态\n"); input(); /* Mine 1-itemsets -*/ printf("\n初始化成功\n"); pass1();
buildTree(); if (numLarge[0] > 0) { printf("\nFP-Tree建立成功\n");
buildTree(); headerTableSize = numLarge[0]; numLarge[0] = 0; FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); printf("\n运行结果\n"); displayResult(); }
/* free memory */ printf("\ndestroy\n");
time(&finish);
used_time = finish-start;
printf("用时: %f秒",used_time); destroy();
getchar(); free(largeItem1); free(support1);
return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -