📄 gd.java
字号:
for (FPTreeNode side_walker = header[entry_index].head; side_walker != null; side_walker = side_walker.next) for (FPTreeNode up_walker = side_walker.parent; up_walker != root; up_walker = up_walker.parent) counts[up_walker.header_index] += side_walker.count; int num_frequent = 0; for (int i = 0; i < counts.length; i++) if (counts[i] >= min_weight) num_frequent++; if (num_frequent == 0) return null; // put all frequent items in an array of Items Item[] item_objs = new Item[num_frequent]; for (int i = 0, j = 0; i < counts.length; i++) if (counts[i] >= min_weight) item_objs[j++] = new Item(header[i].item, counts[i]); // and sort them ascendingly according to weight Arrays.sort(item_objs); // then place the items in an array of ints in descending order int[] items = new int[num_frequent]; for (int i = 0; i < num_frequent; i++) items[i] = item_objs[num_frequent - i - 1].item; // initialize FPTree FPTree fpt = new FPTree(items, num_rows, min_weight, cache_writer); for (FPTreeNode side_walker = header[entry_index].head; side_walker != null; side_walker = side_walker.next) if (side_walker.parent != root) { int i = side_walker.parent.seq_num; Item[] pattern = new Item[i]; for (FPTreeNode up_walker = side_walker.parent; up_walker != root; up_walker = up_walker.parent) // we store the header index in the count field // so that we can use it later // to access the count from counts[] pattern[--i] = new Item(up_walker.item, up_walker.header_index); processPattern(pattern, side_walker.count, fpt, counts); } return fpt; } // NOTE: pattern[] elements contain in their count field the // header entry index of the corresponding item which also // can be used to index the counts[] array private void processPattern(Item[] pattern, int count, FPTree fpt, int[] counts) { int i, j, num_frequent; Item item_obj; int[] items; Item[] item_objs; // how many frequent items are in this pattern? for (i = 0, num_frequent = 0; i < pattern.length; i++) { item_obj = pattern[i]; if (counts[item_obj.count] >= min_weight) num_frequent++; } if (num_frequent > 0) { // select only frequent items into an array of Items // these form a conditional pattern item_objs = new Item[num_frequent]; for (i = 0, j = 0; i < pattern.length; i++) { item_obj = pattern[i]; if (counts[item_obj.count] >= min_weight) item_objs[j++] = new Item(item_obj.item, counts[item_obj.count]); } // sort them Arrays.sort(item_objs); // get the items in reverse order in an array of ints items = new int[num_frequent]; for (i = 0; i < num_frequent; i++) items[i] = item_objs[num_frequent - i - 1].item; // insert them in the FPTree fpt.insert(items, count); } } } private static class Item implements Comparable { public int item; public int count; public Item(int i, int c) { item = i; count = c; } public int compareTo(Object o) { Item other = (Item)o; return (count - other.count); } public String toString() { return "<" + item + ", " + count + ">"; } } // our interface to the outside world private DBReader db_reader; private DBCacheWriter cache_writer; // useful information private long num_rows; private int num_cols; private long min_weight; private int[] counts; // stores count of items starting from 1 private int pass_num; /** * Find the frequent itemsets in a database * * @param dbReader the object used to read from the database * @param cacheWriter the object used to write to the cache * if this is null, then nothing will be saved, this is useful * for benchmarking * @param minSupport the minimum support * @return the number of passes executed over the database */ public int findLargeItemsets(DBReader dbReader, DBCacheWriter cacheWriter, float minSupport) { // save the following into member fields db_reader = dbReader; cache_writer = cacheWriter; num_rows = dbReader.getNumRows(); num_cols = (int)db_reader.getNumColumns(); min_weight = (long)(num_rows * minSupport); // first pass counts item occurrences countItemOccurrences(); // second pass constructs FPTree FPTree fpt = constructFPTree(); // finally we mine the FPTree using the FP-growth algorithm if (fpt != null) { System.out.println("<FPgrowth>: FPTree has " + fpt.count_nodes + " nodes"); fpt.fp_growth(new Itemset()); } else System.out.println("<FPgrowth>: FPTree is empty"); // there will usually be 2 passes unless there are // no frequent items, in which case we do only 1 pass return pass_num; } private void countItemOccurrences() { counts = new int[num_cols + 1]; try { Itemset row = db_reader.getFirstRow(); for (int i = 0; i < row.size(); i++) counts[row.getItem(i)]++; while (db_reader.hasMoreRows()) { row = db_reader.getNextRow(); for (int i = 0; i < row.size(); i++) counts[row.getItem(i)]++; } } catch (Exception e) { System.err.println("Error scanning database!!!\n" + e); } // we did one pass over database pass_num++; } private FPTree constructFPTree() { // see how many frequent items there are in the database int num_frequent = 0; for (int i = 1; i < counts.length; i++) if (counts[i] >= min_weight) num_frequent++; if (num_frequent == 0) return null; // put all frequent items in an array of Items Item[] item_objs = new Item[num_frequent]; for (int i = 1, j = 0; i < counts.length; i++) if (counts[i] >= min_weight) item_objs[j++] = new Item(i, counts[i]); // and sort them ascendingly according to weight Arrays.sort(item_objs); // then place the items in an array of ints in descending order int[] items = new int[num_frequent]; for (int i = 0; i < num_frequent; i++) items[i] = item_objs[num_frequent - i - 1].item; // initialize FPTree FPTree fpt = new FPTree(items, num_rows, min_weight, cache_writer); try { Itemset row = db_reader.getFirstRow(); processRow(row, fpt); while (db_reader.hasMoreRows()) { row = db_reader.getNextRow(); processRow(row, fpt); } } catch (Exception e) { System.err.println("Error scanning database!!!\n" + e); } pass_num++; return fpt; } private void processRow(Itemset row, FPTree fpt) { int i, j, item, num_frequent; int[] items; Item[] item_objs; // how many frequent items are in this row? for (i = 0, num_frequent = 0; i < row.size(); i++) { item = row.getItem(i); if (counts[item] >= min_weight) num_frequent++; } if (num_frequent > 0) { // select only frequent items into an array of Items item_objs = new Item[num_frequent]; for (i = 0, j = 0; i < row.size(); i++) { item = row.getItem(i); if (counts[item] >= min_weight) { item_objs[j++] = new Item(item, counts[item]); } } // sort them Arrays.sort(item_objs); // get the items in reverse order into an array of ints items = new int[num_frequent]; for (i = 0; i < num_frequent; i++) items[i] = item_objs[num_frequent - i - 1].item; // insert them in the FPTree fpt.insert(items, 1); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -