📄 fpt.c
字号:
free(pattern); return;}/****************************************************************************************** * Function: insert_tree * * Description: * This function is to insert a frequent pattern * of a transaction to the FP-tree (or conditional FP-tree). * The frequent pattern consists of a list of the frequent 1-items * of a transaction, and is sorted according to the sorted order of the * 1. frequent 1-items in function pass1(), if it is the initial FP-tree; * 2. frequent 1-items in the conditional pattern base, if it is a conditional FP-tree. * This function is recursively invoked and insert the (ptr+1)th item of the * frequent pattern in the (ptr+1)th round of recursion. * * There are 3 cases to handle the insertion of an item: * 1. The tree or subtree being visited has no children. * Create the first child and store the info of the item * to this first child. * 2. The tree or subtree has no children that match the current item. * Add a new child node to store the item. * 3. There is a match between the item and a child of the tree. * Increment the count of the child, and visit the subtree of this child. * * Invoked from: * buildTree() * buildConTree() * insertTree() * * Functions to be invoked: * insertTree() * * Parameters: * - freqItemP : The list of frequent items of the transaction. * They are sorted according to the order of frequent 1-items. * - indexList : 'indexList[i]' is the corresponding index of the header table * that represents the item 'freqItemP[i]'. * - count : The initial value for the 'count' of the FP-tree node for the current * freqItemP[i]. * It is equal to 1 if the FP-tree is the initial one, * otherwiese it is equal to the support of the base of * this conditional FP-tree. * - ptr : Number of items in the frequent pattern inserted so far. * - length : Number of items in the frequent pattern. * - T : The current FP-tree/subtree being visited so far. * - headerTableLink : Header table of the FP-tree. * - path : Number of new tree path (i.e. new leaf nodes) created so far for the insertions. */void insert_tree(int *freqItemP, int *indexList, int count, int ptr, int length, FPTreeNode T, FPTreeNode *headerTableLink, int *path) { childLink newNode; FPTreeNode hNode; FPTreeNode hPrevious; childLink previous; childLink aNode; /* If all the items have been inserted */ if (ptr == length) return; /* Case 1 : If the current subtree has no children */ if (T->children == NULL) { /* T has no children */ /* Create child nodes for this node */ newNode = (childLink) malloc (sizeof(ChildNode)); if (newNode == NULL) { printf("out of memory\n"); exit(1); } /* Create a first child to store the item */ newNode->node = (FPTreeNode) malloc (sizeof(FPNode)); if (newNode->node == NULL) { printf("out of memory\n"); exit(1); } /* Store information of the item */ newNode->node->item = freqItemP[ptr]; newNode->node->count = count; newNode->node->numPath = 1; newNode->node->parent = T; newNode->node->children = NULL; newNode->node->hlink = NULL; newNode->next = NULL; T->children = newNode; /* Link the node to the header table */ hNode = headerTableLink[indexList[ptr]]; if (hNode == NULL) { /* Place the node at the front of the horizontal link for the item */ headerTableLink[indexList[ptr]] = newNode->node; } else { /* Place the node at the end using the horizontal link */ while (hNode != NULL) { hPrevious = hNode; hNode = hNode->hlink; } hPrevious->hlink = newNode->node; } /* Insert next item, freqItemP[ptr+1], to the tree */ insert_tree(freqItemP, indexList, count, ptr+1, length, T->children->node, headerTableLink, path); T->numPath += *path; } else { aNode = T->children; while ((aNode != NULL) && (aNode->node->item != freqItemP[ptr])) { previous = aNode; aNode = aNode->next; } if (aNode == NULL) { /* Case 2: Create a new child for T */ newNode = (childLink) malloc (sizeof(ChildNode)); if (newNode == NULL) { printf("out of memory\n"); exit(1); } newNode->node = (FPTreeNode) malloc (sizeof(FPNode)); if (newNode->node == NULL) { printf("out of memory\n"); exit(1); } /* Store information of the item */ newNode->node->item = freqItemP[ptr]; newNode->node->count = count; newNode->node->numPath = 1; newNode->node->parent = T; newNode->node->children = NULL; newNode->node->hlink = NULL; newNode->next = NULL; previous->next = newNode; /* Link the node to the header table */ hNode = headerTableLink[indexList[ptr]]; if (hNode == NULL) { /* Place the node at the front of the horizontal link for the item */ headerTableLink[indexList[ptr]] = newNode->node; } else { /* Place the node at the end using the horizontal link */ while (hNode != NULL) { hPrevious = hNode; hNode = hNode->hlink; } hPrevious->hlink = newNode->node; } /* Insert next item, freqItemP[ptr+1], to the tree */ insert_tree(freqItemP, indexList, count, ptr+1, length, newNode->node, headerTableLink, path); (*path)++; T->numPath += *path; } else { /* Case 3: Match an existing child of T */ /* Increment the count */ aNode->node->count += count; /* Insert next item, freqItemP[ptr+1], to the tree */ insert_tree(freqItemP, indexList, count, ptr+1, length, aNode->node, headerTableLink, path); T->numPath += *path; } } return;}/****************************************************************************************** * Function: buildConTree * * Description: * Build a conditional FP-tree and the corresponding header table from * a conditional pattern base. * * Invoked from: * genConditionalPatternTree() * * Functions to be invoked: * q_sortA() * insert_tree() * * Parameters: * *conRoot -> Root of the conditional FP-tree * **conHeader -> Conditional header table for all the frequent items. * conHeaderSize -> Number of items (frequent) in the conditional header table * *conLargeItem -> Stores all the frequent items of the conditional pattern base * *conLargeItemSupprt -> The conditional support counts of the conditional items * "Conditional support" of an item is * the number of itemsets containing the item and the base items. * T -> The parent conditional FP-tree * headerTable -> The parent header table * baseIndex -> The index position of the parent header table that refer to the item * contained in the base of this conditional FP-tree * headerSize -> The number of items in the parent header table */void buildConTree(FPTreeNode *conRoot, FPTreeNode **conHeader, int conHeaderSize, int *conLargeItem, int *conLargeItemSupport, FPTreeNode T, FPTreeNode *headerTable, int baseIndex, int headerSize){ FPTreeNode aNode; FPTreeNode ancestorNode; int *freqItemP; /* Holds all the frequent items of a transaction of the conditional pattern base */ int *indexList; /* Holds the index position of the parent header table for the corresponding item in freqItemP[] */ int path; int count; int i; /* create conditional header table */ *conHeader = (FPTreeNode *) malloc (sizeof(FPTreeNode) * conHeaderSize); if (*conHeader == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < conHeaderSize; i++) (*conHeader)[i] = NULL; /* create root of the conditional FP-tree */ (*conRoot) = (FPTreeNode) malloc (sizeof(FPNode)); if (*conRoot == NULL) { printf("out of memory\n"); exit(1); } /* Initialize the root of the conditional FP-tree */ (*conRoot)->numPath = 1; (*conRoot)->parent = NULL; (*conRoot)->children = NULL; (*conRoot)->hlink = NULL; freqItemP = (int *) malloc (sizeof(int) * conHeaderSize); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * conHeaderSize); if (indexList == NULL) { printf("out of memory\n"); exit(1); } /* Set aNode to the first node of the parent FP-tree * that contains the base item. */ aNode = headerTable[baseIndex]; /* Visit each path from the base item to the root * of the parent FP-tree and * extract all the frequent items of the conditional pattern base. * Sort this items in descending order of their frequency and * insert them to the conditional FP-tree. */ while (aNode != NULL) { ancestorNode = aNode->parent; count = 0; /* Identify the frequent items in each path from the * node containing the item in the base to the root. * Each of such path is just like a transaction of the * conditional pattern base. * This identification can be done because * conLargeItem[] contains all the frequent items * of the conditional pattern base. * Store the frequent items to freqItemP[], and * the corresponding index position of the * conditional header table to indexList[]. */ while (ancestorNode != T) { for (i=0; i < conHeaderSize; i++) { if (ancestorNode->item == conLargeItem[i]) { freqItemP[count] = ancestorNode->item; indexList[count] = i; count++; break; } } ancestorNode = ancestorNode->parent; } /* Sort the frequent items in this path in ascending order * of indexList[], i.e. sort in descending order of the * frequency of the items in the conditional pattern base. */ q_sortA(indexList, freqItemP, 0, count-1, count); path = 0; /* Insert the frequent items to the conditional FP-tree. */ insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path); aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}/****************************************************************************************** * Function: genConditionalPatternTree * * Description: * -- Form the conditional pattern base of each item in the header table. * -- Build the conditional FP-tree for the item. * -- Perfrom FPgrowth() for the conditional FP-tree. * * Parameters (In): * pattern[] -> Base items for the conditional FP-tree. * baseSize -> Number of base items -1. * patternSupport -> Support of the base items (it is a large itemset). * T -> FP-tree. * headerIndex -> Index position in the header table refering * the base item for the conditional FP-tree. * headerSize -> Number of items in the header table. * *headerTableLink -> Header table of the FP-tree (T). * * Invoked from: * FPgrowth() * * Functions to be invoked: * q_sortD() * buildConTree() * FPgrowth() * * Parameters (Out): * conLargeItem[] -> To hold frequent items in the conditional pattern base. * conLargeItemSupport[] -> To hold "conditional support" of each items in the conditional pattern base. * "Conditional support" of an item is * the number of itemsets containing the item and the base items. * * Global Variables (read only): * threshold -> Support threshold */void genConditionalPatternTree(int *pattern, int baseSize, int patternSupport, int *conLargeItem, int *conLargeItemSupport, FPTreeNode T, int headerIndex, int headerSize, FPTreeNode *headerTableLink){ int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, ancestorNode; int j; for (j=0; j < headerSize; j++) conLargeItemSupport[j] = 0; aNode = headerTableLink[headerIndex]; conHeaderSize = 0; /* Find all the frequent items in the conditional pattern base. * -- Visit, in bottom-up manner, all the ancestor nodes of all the nodes containing this item. * -- Count the "conditional supports" of the items in the ancestor nodes * (i.e. items in conditional pattern base), and store the values to conLargeSupport[]. * "Conditional support" of an item is * the number of itemsets containing the item and the base items. * -- Items in the ancestor nodes having conditional supports >= threshold are added to * conLargeItem[]. */ while (aNode != NULL) { ancestorNode = aNode->parent; while (ancestorNode != T) { for (j=0; j < headerSize; j++) { if (ancestorNode->item == headerTableLink[j]->item) { /* Increment the conditional support count * for this ancestor item */ conLargeItemSupport[j] += aNode->count; if ((conLargeItemSupport[j] >= threshold) && (conLargeItemSupport[j] - aNode->count < threshold)) { /* Add the ancestor item to the conditional pattern base, * and update the number of items in the * conditional header table */ conLargeItem[j] = ancestorNode->item; conHeaderSize++; } break; } } ancestorNode = ancestorNode->parent; } /* Next node of the FP-tree containing the base item */ aNode = aNode->hlink; } /* Sort the items in the conditional pattern base in descending order of their * conditional support count */ q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); /* Generate the conditional FP-tree and mine recursively */ if (conHeaderSize > 0) { /* Build conditional FP-tree */ buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, T, headerTableLink, headerIndex, headerSize); /* Mining */ FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); free(conHeader); destroyTree(conRoot); } return;}/****************************************************************************************** * Function: FPgrowth * * Description: * Perform the FP-growth algorithm. * * There are 2 cases for the FP-tree: * Case 1: * The tree consists of a single path only. * -- Form any combination of items in the path to * generate large itemsets containing the base items of this FP-tree. * Case 2: * The tree consists of more than one path. * -- Visit the header table in a top-down manner, i.e. visit largest item first. * -- Form the conditional pattern base of each item in the header table. * -- Build the conditional FP-tree for the item. * -- Perfrom FPgrowth() for the conditional FP-tree. * * Invoked from: * main() * genConditionalPatternTree() * * Functions to be invoked: * combine() * addLargeList() * genConditionalPatternTree() * * Parameters (In): * T -> A FP-tree * *headerTableLink -> Header table * headSize -> Number of items in the header table * *baseItems -> The base items for this FP-tree * baseSize -> The number of base items * * Global Variables: * threshold -> Support threshold */void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; FPTreeNode aNode = NULL; int *conLargeItem; int *conLargeItemSupport; if (baseSize >= realK) return; if (T == NULL) return; /* Create array, conLargeItem, to store the items in the header table; * and also an array, conLargeItemSupport, to store the corresponding count value */ conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) { printf("out of memory\n"); exit(1); } if (T->numPath == 1) { /* Case 1: Single Path */ count = 0; if (T->children != NULL) aNode = T->children->node; /* Visit the path in top-down manner, and store the items and count values */ while (aNode != NULL) { conLargeItem[count] = aNode->item; conLargeItemSupport[count] = aNode->count; count++; if (aNode->children != NULL) aNode = aNode->children->node; else aNode = NULL; } /* Form any combination of items in the path to * generate large itemsets containing the base items stored in 'baseItems' */ combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize); free(conLargeItem); free(conLargeItemSupport); } else { /* Multiple Path */ /* Create an array to store the base items for a conditional FP-tree.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -