📄 gd.java
字号:
/*FPgrowth.java - an implementation of algorithm FPgrowth for use with:ARMiner - Association Rules MinerCopyright (C) 2000-2001 UMass/Boston - Computer Science DepartmentThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (atyour option) any later version.This program is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307USA*/import java.util.*;import java.io.IOException;/** FPgrowth.java<P> This class implements the FPgrowth algorithm for finding large itemsets. (see "Mining Frequent Patterns without Candidate Generation" by Jiawei Han, Jian Pei, and Yiwen Yin from Simon Fraser University 2000) *//* This file is a part of the ARMiner project. (P)06/01/2001 by Laurentiu Cristofor*/public class FPgrowth implements LargeItemsetsFinder{ private static class FPTreeNode { // data public int item; // item id public int count; // item count // the following two are kept to speed up calculations public int seq_num; // sequence number of node, = (depth + 1) public int header_index; // index in header of the entry for item // links public FPTreeNode parent; // parent node public FPTreeNode child; // first child node public FPTreeNode sibling; // next sibling node public FPTreeNode next; // next node in FPTree containing same item public FPTreeNode() { } public FPTreeNode(int i, int c, int sn, int hi, FPTreeNode p, FPTreeNode s, FPTreeNode n) { item = i; count = c; seq_num = sn; header_index = hi; parent = p; sibling = s; next = n; } } private static class FPTreeHeaderEntry { public int item; public int count; // total item count in tree public FPTreeNode head; public FPTreeHeaderEntry() { } public FPTreeHeaderEntry(int i) { item = i; } } private static class FPTree { public FPTreeHeaderEntry[] header; public FPTreeNode root; // we need to be able to figure out // the index of an FPTreeHeaderEntry // for a given item public Map item2index; // to quickly tell whether we have a single path or not public boolean hasMultiplePaths; // for statistics public int count_nodes; // information we need about the database public long num_rows; public long min_weight; public DBCacheWriter cache_writer; public FPTree(int[] items, long num_rows, long min_weight, DBCacheWriter cache_writer) { header = new FPTreeHeaderEntry[items.length]; root = new FPTreeNode(); item2index = new HashMap(items.length); this.num_rows = num_rows; this.min_weight = min_weight; this.cache_writer = cache_writer; for (int i = 0; i < items.length; i++) { header[i] = new FPTreeHeaderEntry(items[i]); item2index.put(new Integer(items[i]), new Integer(i)); } } public void insert(int[] items, int count) { // current_node will be the node below which we look to insert FPTreeNode current_node = root; for (int index = 0; index < items.length; index++) { // find header entry for items[index] int entry_index = ((Integer)item2index.get(new Integer(items[index]))).intValue(); // update item count in header entry header[entry_index].count += count; // we look among the children of current_node // for the one containing items[index] FPTreeNode walker = current_node.child; for ( ; walker != null; walker = walker.sibling) if (walker.item == items[index]) break; // case no child contained the item // -> we need to insert a new node if (walker == null) { // if we're creating a new branch if (current_node.child != null) hasMultiplePaths = true; count_nodes++; // parent is current_node, sibling is current_node.child, // next is head from header entry // (we insert at the beginning of the 'next' // and 'sibling' based linked lists) FPTreeNode new_node = new FPTreeNode(items[index], count, index + 1, entry_index, current_node, current_node.child, header[entry_index].head); header[entry_index].head = new_node; current_node.child = new_node; // and continue inserting from this new node current_node = new_node; } // if we get here then walker points // to a node containing items[index] else { // update count of node walker.count += count; // and continue inserting from this node current_node = walker; } } } public void fp_growth(Itemset is_suffix) { if (!hasMultiplePaths) { // collect items from the tree, least frequent item first!!! int[] items = new int[header.length]; for (int i = 0; i < header.length; i++) items[header.length - i - 1] = header[i].item; // generate all item combinations of the tree's single branch combine(items, is_suffix); } else for (int i = header.length - 1; i >= 0; i--) { Itemset is_new = new Itemset(is_suffix); is_new.addItem(header[i].item); is_new.setWeight(header[i].count); is_new.setSupport((float)header[i].count / (float)num_rows); // write itemset to the cache try { if (cache_writer != null) cache_writer.writeItemset(is_new); } catch (IOException e) { System.err.println("I/O error!!!\n" + e); } FPTree fpt = buildConditionalFPTree(header[i].item); if (fpt != null) fpt.fp_growth(is_new); } } private void combine(int[] items, Itemset is_combination) { int count; for (int i = 0; i < items.length; i++) { Itemset is_new_combination = new Itemset(is_combination); is_new_combination.addItem(items[i]); // store in itemset the weight and support of all itemsets // combined with items[i]; // note that we go through items in increasing order of their // support so that we can set it up correctly count = header[header.length - i - 1].count; is_new_combination.setWeight(count); is_new_combination.setSupport((float)count / (float)num_rows); // write itemset to the cache try { if (cache_writer != null) cache_writer.writeItemset(is_new_combination); } catch (IOException e) { System.err.println("I/O error!!!\n" + e); } combine(items, is_new_combination, i + 1); } } // create all combinations of elements in items[] // starting at index from and append them to is_combination private void combine(int[] items, Itemset is_combination, int from) { for (int i = from; i < items.length; i++) { Itemset is_new_combination = new Itemset(is_combination); is_new_combination.addItem(items[i]); // write itemset to the cache try { if (cache_writer != null) cache_writer.writeItemset(is_new_combination); } catch (IOException e) { System.err.println("I/O error!!!\n" + e); } combine(items, is_new_combination, i + 1); } } private FPTree buildConditionalFPTree(int item) { // find header entry for item int entry_index = ((Integer)item2index.get(new Integer(item))).intValue(); // we will see which of the remaining items are frequent // with respect to the conditional pattern base of item // we have a counter for each entry in the header that // comes before item's own entry int[] counts = new int[entry_index];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -