📄 fpt.c
字号:
* Size of the base should be (baseSize + 1). */ pattern = (int *) malloc (sizeof(int) * (baseSize + 1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } /* Visit the header table in a top-down manner. * -- Find the conditional pattern base for each base item in the header table. * -- Find the frequent items of the conditional pattern base of the base item. * -- Build conditional FP-tree for the base item. * -- Recursively mine the conditional FP-tree. */ for (i=0; i < headerSize; i++) { /* Add the item to the base of its conditional FP-tree */ pattern[0] = headerTableLink[i]->item; /* Add the base of T to the base of the conditional FP-tree */ for (j=0; j < baseSize; j++) { pattern[j+1] = baseItems[j]; } aNode = headerTableLink[i]; patternSupport = 0; /* Count the support of the base of the conditional FP-tree */ while (aNode != NULL) { patternSupport += aNode->count; aNode = aNode->hlink; } /* Add the itemset formed by the base items * to the resulting list because it must be large. */ addToLargeList(pattern, patternSupport, baseSize); /* Find frquent items, build conditional FP-tree and perform mining. */ genConditionalPatternTree(pattern, baseSize, patternSupport, conLargeItem, conLargeItemSupport, T, i, headerSize, headerTableLink); } free(pattern); free(conLargeItem); free(conLargeItemSupport); } return;}/****************************************************************************************** * Function: pass1() * * Description: * Scan the DB and find the support of each item. * Find the large 1-itemsets according to the support threshold. * * Invoked from: * main() * * Functions to be invoked: * q_sortD() * * Global variables: * largeItem1[] -> Array to store 1-itemsets * support1[] -> Support[i] = support of the 1-itemset stored in largeItem[i] * largeItemset[] -> largeItemset[i] = resulting list for large (i+1)-itemsets * realK -> Maximum size of itemset to be mined * numLarge[] -> numLarge[i] = Number of large (i+1)-itemsets discovered so far * * Global variables (read only): * numTrans -> number of transactions in the database * expectedK -> User specified maximum size of itemset to be mined * dataFile -> Database file * */void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j; /* Initialize the 1-itemsets list and support list */ support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < numItem; i++) { support1[i] = 0; largeItem1[i] = i; } /* scan DB to count the frequency of each item */ if ((fp = fopen(dataFile, "r")) == NULL) { printf("Can't open data file, %s.\n", dataFile); exit(1); } /* Scan each transaction of the DB */ for (i=0; i < numTrans; i++) { /* Read the transaction size */ fscanf(fp, "%d", &transSize); /* Mark down the largest transaction size found so far */ if (transSize > maxSize) maxSize = transSize; /* Read the items in the transaction */ for (j=0; j < transSize; j++) { fscanf(fp, "%d", &item); support1[item]++; } } fclose(fp); /* Determine the upper limit of itemset size to be mined according to DB and user input. * If the user specified maximum itemset size (expectedK) is greater than * the largest transaction size (maxSize) in the database, or exptectedK <= 0, * then set realK = maxSize; * otherwise realK = expectedK */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0)) realK = maxSize; printf("max transaction sizes = %d\n", maxSize); printf("max itemset size (K_max) to be mined = %d\n", realK); /* Initialize large k-itemset resulting list and corresponding support list */ largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK); numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < realK; i++) { largeItemset[i] = NULL; numLarge[i] = 0; } /* Sort the supports of 1-itemsets in descending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++) printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ numLarge[0] = 0; while ((numLarge[0] < numItem) && (support1[numLarge[0]] >= threshold)) (numLarge[0])++; printf("\nNo. of large 1-itemsets (numLarge[0]) = %d\n", numLarge[0]); return;}/****************************************************************************************** * Function: buildTree() * * Description: * Build the initial FP-tree. * * Invoked from: * main() * * Functions to be invoked: * insert_tree() * q_sortA() * * Global variables: * root -> Pointer to the root of this initial FP-tree * headerTableLink -> Header table for this initial FP-tree * * Global variables (read only): * numLarge[] -> Large k-itemsets resulting list for k = 1 to realK */void buildTree(){ int *freqItemP; /* Store frequent items of a transaction */ int *indexList; /* indexList[i] = the index position in the large 1-item list storing freqItemP[i] */ int count; /* Number of frequent items in a transaction */ FILE *fp; /* Pointer to the database file */ int transSize; /* Transaction size */ int item; /* An item in the transaction */ int i, j, m; int path; /* Number of new tree paths (i.e. new leaf nodes) created so far */ /* Create header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * numLarge[0]); if (headerTableLink == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < numLarge[0]; i++) headerTableLink[i] = NULL; /* Create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) { printf("out of memory\n"); exit(1); } /* Initialize the root node */ root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; /* Create freqItemP to store frequent items of a transaction */ freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) { printf("out of memory\n"); exit(1); } /* scan DB and insert frequent items into the FP-tree */ if ((fp = fopen(dataFile, "r")) == NULL) { printf("Can't open data file, %s.\n", dataFile); exit(1); } for (i=0; i < numTrans; i++) { /* Read the transaction size */ fscanf(fp, "%d", &transSize); count = 0; path = 0; for (j=0; j < transSize; j++) { /* Read a transaction item */ fscanf(fp, "%d", &item); /* Store the item to the frequent list, freqItemP, * if it is a large 1-item. */ for (m=0; m < numLarge[0]; m++) { if (item == largeItem1[m]) { /* Store the item */ freqItemP[count] = item; /* Store the position in the large 1-itemset list storing this item */ indexList[count] = m; count++; break; } } } /* Sort the items in the frequent item list in ascending order of indexList, * i.e. sort according to the order of the large 1-itemset list */ q_sortA(indexList, freqItemP, 0, count-1, count); /* Insert the frequent patterns of this transaction to the FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); } fclose(fp); free(freqItemP); free(indexList); free(largeItem1); free(support1); return;}/****************************************************************************************** * Function: displayResult * * Description: * Output the large itemsets of all sizes * to both screen and result file. * * Invoked from: * main() * * Functions to be invoked: None * * Global variables (read only): * realK -> maximum size of itemsets * numLarge[] -> numLarge[i] = large (i+1)-itemsets' resulting list * outFile -> result file to store the output result */void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int i, j; if ((fp = fopen(outFile, "w")) == NULL) { printf("Can't open data file, %s.\n", outFile); exit(1); } fprintf(fp, "K_{max} = %d\n\n", realK); for (i=0; i < realK; i++) { fprintf(fp, "%d Large %d-itemsets:\n", numLarge[i], i+1); if (numLarge[i] == 0) break; printf("\n%d Large %d-itemsets[support]:\n", numLarge[i], i+1); /* Visit the large (i+1)-itemsets' resulting list */ aLargeItemset = largeItemset[i]; /* print out the large (i+1)-itemsets */ while (aLargeItemset != NULL) { /* print an (i+1)-itemset */ for (j=0; j <= i; j++) { printf("%d ", aLargeItemset->itemset[j]); fprintf(fp, "%d ", aLargeItemset->itemset[j]); } /* print the support of the itemset */ printf("[%d]\n", aLargeItemset->support); fprintf(fp, "[%d]\n", aLargeItemset->support); aLargeItemset = aLargeItemset->next; } printf("\n"); fprintf(fp, "\n"); } fclose(fp); return;}/****************************************************************************************** * Function: input * * Description: * Read the input parameters from the configuration file. * * Invoked from: * main() * * Functions to be invoked: None * * Input parameters: * *configFile -> The configuration file * * Global variables: * expectedK -> User specified maximum size of itemset to be mined * thresholdDecimal -> Normalized support threshold, range: (0, 1] * numItem -> Total number of different items in the DB * numTrans -> Total number of transactions in the DB * dataFile -> Data file * outFile -> Result file for storing the large itemsets */void input(char *configFile){ FILE *fp; float thresholdDecimal; if ((fp = fopen(configFile, "r")) == NULL) { printf("Can't open config. file, %s.\n", configFile); exit(1); } fscanf(fp, "%d %f %d %d", &expectedK, &thresholdDecimal, &numItem, &numTrans); fscanf(fp, "%s %s", dataFile, outFile); fclose(fp); printf("expectedK = %d\n", expectedK); printf("thresholdDecimal = %f\n", thresholdDecimal); printf("numItem = %d\n", numItem); printf("numTrans = %d\n", numTrans); printf("dataFile = %s\n", dataFile); printf("outFile = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; if (threshold == 0) threshold = 1; printf("threshold = %d\n", threshold); return;}/****************************************************************************************** * Function: main * * Description: * This function reads the config. file for six input parameters, * finds the frequent 1-itemsets, builds the initial FP-tree * using the frequent 1-itemsets and * peforms the FP-growth algorithm of the paper. * It measure both CPU and I/O time for build tree and mining. * * Functions to be invoked: * input() -> Read config. file * pass1() -> Scan DB and find frquent 1-itemsets * buildTree() -> Build the initial FP-tree * FPgrowth() -> Start mining * * Parameters: * Config. file name */void main(int argc, char *argv[]){ float userTime, sysTime; struct rusage myTime1, myTime2, myTime3; int headerTableSize; /* Usage ------------------------------------------*/ printf("\nFP-tree: Mining large itemsets using user support threshold\n\n"); if (argc != 2) { printf("Usage: %s <config. file>\n\n", argv[0]); printf("Content of config. file:\n"); printf(" Line 1: Upper limit of large itemsets size to be mined\n"); printf(" Line 2: Support threshold (normalized to [0, 1])\n"); printf(" Line 3: No. of different items in the DB\n"); printf(" Line 4: No. of transactions in the DB\n"); printf(" Line 5: File name of the DB\n"); printf(" Line 6: Result file name to store the large itemsets\n\n"); exit(1); } /* read input parameters --------------------------*/ printf("input\n"); input(argv[1]); getrusage(RUSAGE_SELF,&myTime1); /* pass 1 : Mine the large 1-itemsets -------------*/ printf("\npass1\n"); pass1(); /* Mine the large k-itemsets (k = 2 to realK) -----*/ if (numLarge[0] > 0) { /* create FP-tree --------------------------*/ printf("\nbuildTree\n"); buildTree(); getrusage(RUSAGE_SELF,&myTime2); /* mining frequent patterns ----------------*/ printf("\npassK\n"); headerTableSize = numLarge[0]; numLarge[0] = 0; FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); getrusage(RUSAGE_SELF,&myTime3); /* output result of large itemsets ---------*/ printf("\nresult\n"); displayResult(); /* output execution time ---------------------*/ printf("Build tree time:\n"); userTime = ((float) (myTime2.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) + ((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; sysTime = ((float) (myTime2.ru_stime.tv_sec - myTime1.ru_stime.tv_sec)) + ((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; printf("User time : %f seconds\n", userTime); printf("System time : %f seconds\n\n", sysTime); printf("FP-tree growth time:\n"); userTime = ((float) (myTime3.ru_utime.tv_sec - myTime2.ru_utime.tv_sec)) + ((float) (myTime3.ru_utime.tv_usec - myTime2.ru_utime.tv_usec)) * 1e-6; sysTime = ((float) (myTime3.ru_stime.tv_sec - myTime2.ru_stime.tv_sec)) + ((float) (myTime3.ru_stime.tv_usec - myTime2.ru_stime.tv_usec)) * 1e-6; printf("User time : %f seconds\n", userTime); printf("System time : %f seconds\n\n", sysTime); printf("Total execution time:\n"); userTime = ((float) (myTime3.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) + ((float) (myTime3.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; sysTime = ((float) (myTime3.ru_stime.tv_sec - myTime1.ru_stime.tv_sec)) + ((float) (myTime3.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; printf("User time : %f seconds\n", userTime); printf("System time : %f seconds\n", sysTime); printf("Total time: %f seconds\n\n", userTime + sysTime); } /* free memory ------------------------------------*/ printf("\ndestroy\n"); destroy(); return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -