📄 totalsupporttree.java
字号:
/* ------------------------------------------------------------------------- *//* *//* TOTAL SUPPORT TREE *//* *//* Frans Coenen *//* *//* 10 January 2003 *//* (Revised 23/1/3003, 8/2/2003, 18/3/2003, 3/3/2003. 7/4/2004) *//* *//* Department of Computer Science *//* The University of Liverpool *//* */ /* ------------------------------------------------------------------------- *//* Structure:AssocRuleMining | +-- TotalSupportTree To compile: javac AprioriTsortedPrunedApp.javaTo run *//* Java packages */ import java.io.*;import java.util.*;// Java GUI packagesimport javax.swing.*;/** Methods concerned with the generation, processing and manipulation of T-tree data storage structures used to hold the total support counts for large itemsets.@author Frans Coenen@version 3 July 2003 */public class TotalSupportTree extends AssocRuleMining { /* ------ FIELDS ------ */ // Data structures /** The reference to start of t-tree. */ protected TtreeNode[] startTtreeRef; /** The serialisation array (used for distributed ARM applications when sending T-trees from one processor to another via a JavaSpace). */ //protected int[] serializationArray; /** The marker to the "current" location in the serialisation array. <P> initialised to zero. */ //protected int serializationRef = 0; // Constants /** The maximum number of frequent sets that may be generated. */ protected final int MAX_NUM_FREQUENT_SETS = 80000; // Other fields /** The next level indicator flag: set to <TT>true</TT> if new level generated and by default. */ protected boolean nextLevelExists = true ; /** Instance of the class RuleList */ protected RuleList currentRlist = null; // Diagnostics /** The number of frequent sets (nodes in t-tree with above minimum support) generated so far. */ protected int numFrequentsets =0; /** The number of updates required to generate the T-tree. */ protected long numUpdates = 0l; /** Time to generate P-tree. */ protected String duration = null; /* ------ CONSTRUCTORS ------ */ /** Processes command line arguments. */ public TotalSupportTree(String[] args) { super(args); // Create RuleList object for later usage currentRlist = new RuleList(); } /** With argument from existing instance of class AssocRuleMining. */ /*public TotalSupportTree(AssocRuleMining armInstance) { numRows = armInstance.numRows; numCols = armInstance.numCols; support = armInstance.support; confidence = armInstance.confidence; dataArray = armInstance.dataArray; minSupport = armInstance.minSupport; numOneItemSets = armInstance.numOneItemSets; } */ /** Default constructor */ /*public TotalSupportTree() { } */ /* ------ METHODS ------ */ /*----------------------------------------------------------------------- */ /* */ /* T-TREE BUILDING METHODS */ /* */ /*----------------------------------------------------------------------- */ /* CREATE T-TREE TOP LEVEL */ /** Generates level 1 (top) of the T-tree. */ protected void createTtreeTopLevel() { // Dimension and initialise top level of T-tree startTtreeRef = new TtreeNode[numOneItemSets+1]; for (int index=1;index<=numOneItemSets;index++) startTtreeRef[index] = new TtreeNode(); // Add support for each 1 itemset createTtreeTopLevel2(); // Prune top level, setting any unsupport 1 itemsets to null pruneLevelN(startTtreeRef,1); } /* CREATE T-TREE 2nd LEVEL */ /** Adds supports to level 1 (top) of the T-tree. */ protected void createTtreeTopLevel2() { /* STUBB */ } /*---------------------------------------------------------------------- */ /* */ /* PRUNING */ /* */ /*---------------------------------------------------------------------- */ /* PRUNE LEVEL N */ /** Prunes the given level in the T-tree. <P> Operates in a recursive manner to first find the appropriate level in the T-tree before processing the required level (when found). Pruning carried out according to value of <TT>minSupport</TT> field. @param linkRef The reference to the current sub-branch of T-tree (start at top of tree) @param level the level marker, set to the required level at the start and then decremented by 1 on each recursion. @return true if all nodes at a given level in the given branch of the T-tree have been prined, false otherwise. */ protected boolean pruneLevelN(TtreeNode [] linkRef, int level) { int size = linkRef.length; // At right leve; if (level == 1) { boolean allUnsupported = true; // Step through level and set to null where below min support for (int index1=1;index1<size;index1++) { if (linkRef[index1] != null) { if (linkRef[index1].support < minSupport) linkRef[index1] = null; else { numFrequentsets++; allUnsupported = false; } } } return(allUnsupported); } // Wrong level, Step through row for (int index1=level;index1<size;index1++) { if (linkRef[index1] != null) { // If child branch step down branch if (linkRef[index1].childRef != null) { if (pruneLevelN(linkRef[index1].childRef,level-1)) linkRef[index1].childRef=null; } } } return(false); } /*---------------------------------------------------------------------- */ /* */ /* LEVEL GENERATION */ /* */ /*---------------------------------------------------------------------- */ /* GENERATE LEVEL 2 */ /** Generates level 2 of the T-tree. <P> The general <TT>generateLevelN</TT> method assumes we have to first find the right level in the T-tree, that is not necessary in this case of level 2. */ protected void generateLevel2() { // Set next level flag nextLevelExists=false; // loop through top level for (int index=2;index<startTtreeRef.length;index++) { // If supported T-tree node (i.e. it exists) if (startTtreeRef[index] != null) generateNextLevel(startTtreeRef, index,realloc2(null,(short) index)); } } /* GENERATE LEVEL N */ /** Commences process of generating remaining levels in the T-tree (other than top and 2nd levels). <P> Proceeds in a recursive manner level by level untill the required level is reached. Example, if we have a T-tree of the form: <PRE> (A) ----- (B) ----- (C) | | | | (A) (A) ----- (B) </PRE><P> Where all nodes are supported and we wish to add the third level we would walk the tree and attempt to add new nodes to every level 2 node found. Having found the correct level we step through starting from B (we cannot add a node to A), so in this case there is only one node from which a level 3 node may be attached. @param linkRef the reference to the current sub-branch of T-tree (start at top of tree). @param level the level marker, set to the required level at the start and then decremented by 1 on each recursion. @param itemSet the current itemset under consideration. */ protected void generateLevelN(TtreeNode[] linkRef, int level, short[] itemSet) { int index1; int localSize = linkRef.length; // Correct level if (level == 1) { for (index1=2;index1<localSize;index1++) { // If supported T-tree node if (linkRef[index1] != null) generateNextLevel(linkRef,index1, realloc2(itemSet,(short) index1)); } } // Wrong level else { for (index1=level;index1<localSize;index1++) { // If supported T-tree node if (linkRef[index1]!=null && linkRef[index1].childRef!=null) { generateLevelN(linkRef[index1].childRef,level-1, realloc2(itemSet,(short) index1)); } } } } /* GENERATE NEXT LEVEL */ /** Generates a new level in the T-tree from a given "parent" node. <P> Example 1, given the following: <PRE> (A) ----- (B) ----- (C) | | | | (A) (A) ----- (B) </PRE><P> where we wish to add a level 3 node to node (B), i.e. the node {A}, we would proceed as follows: <OL> <LI> Generate a new level in the T-tree attached to node (B) of length one less than the numeric equivalent of B i.e. 2-1=1. <LI> Loop through parent level from (A) to node immediately before (B). <LI> For each supported parent node create an itemset label by combing the index of the parent node (e.g. A) with the complete itemset label for B --- {C,B} (note reverse order), thus for parent node (B) we would get a new level in the T-tree with one node in it --- {C,B,A} represented as A. <LI> For this node to be a candidate large item set its size-1 subsets must be supported, there are three of these in this example {C,A}, {C,B} and {B,A}. We know that the first two are supported because they are in the current branch, but {B,A} is in another branch. So we must generate this set and test it. More generally we must test all cardinality-1 subsets which do not include the first element. This is done using the method <TT>testCombinations</TT>. </OL> <P>Example 2, given: <PRE> (A) ----- (D) | | (A) ----- (B) ----- (C) | | (A) ----- (B) </PRE><P> where we wish to add a level 4 node (A) to (B) this would represent the complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A}, {D,B,A} and {C,B,A}}. We know the first two are supported becuase they are contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are not. </OL> @param parentRef the reference to the level in the sub-branch of the T-tree under consideration. @param endIndex the index of the current node under consideration. @param itemSet the complete label represented by the current node (required to generate further itemsets to be X-checked). */ protected void generateNextLevel(TtreeNode[] parentRef, int endIndex, short[] itemSet) { parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level short[] newItemSet; // Generate a level in Ttree TtreeNode currentNode = parentRef[endIndex]; // Loop through parent sub-level of siblings upto current node for (int index=1;index<endIndex;index++) { // Check if "uncle" element is supported (i.e. it exists) if (parentRef[index] != null) { // Create an appropriate itemSet label to test newItemSet = realloc2(itemSet,(short) index); if (testCombinations(newItemSet)) { currentNode.childRef[index] = new TtreeNode(); nextLevelExists=true; } else currentNode.childRef[index] = null; } } } /* TEST COMBINATIONS */ /** Commences the process of testing whether the N-1 sized sub-sets of a newly created T-tree node are supported elsewhere in the Ttree --- (a process refered to as "X-Checking"). <P> Thus given a candidate large itemsets whose size-1 subsets are contained (supported) in the current branch of the T-tree, tests whether size-1 subsets contained in other branches are supported. Proceed as follows: <OL> <LI> Using current item set split this into two subsets: <P>itemSet1 = first two items in current item set <P>itemSet2 = remainder of items in current item set <LI> Calculate size-1 combinations in itemSet2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -