⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 totalsupporttree.java

📁 apriori algorithm using datasets implementation
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* ------------------------------------------------------------------------- */
/*                                                                           */
/*                             TOTAL SUPPORT TREE                            */
/*                                                                           */
/*                                Frans Coenen                               */
/*                                                                           */
/*                               10 January 2003                             */
/*        (Revised 23/1/3003, 8/2/2003, 18/3/2003, 3/3/2003. 7/4/2004)       */
/*                                                                           */
/*                       Department of Computer Science                      */
/*                         The University of Liverpool                       */
/*                                                                           */ 
/* ------------------------------------------------------------------------- */

/* Structure:

AssocRuleMining
      |
      +-- TotalSupportTree	 */ 

/* Java packages */
import java.io.*;
import java.util.*;

/** Methods concerned with the generation, processing and manipulation of 
T-tree data storage structures used to hold the total support counts for large 
itemsets.
@author Frans Coenen
@version 3 July 2003 */

public class TotalSupportTree extends AssocRuleMining {

    /* ------ FIELDS ------ */
	
    // Data structures    
    /** The reference to start of t-tree. */
    protected TtreeNode[] startTtreeRef;	 
    
    // Constants
    /** The maximum number of frequent sets that may be generated. */
    protected final int MAX_NUM_FREQUENT_SETS = 80000;
    
    // Other fields     
    /** The next level indicator flag: set to <TT>true</TT> if new level 
    generated and by default. */
    protected boolean nextLevelExists = true ; 
    /** Instance of the calss RuleList */
    protected RuleList currentRlist = null;

    // Diagnostics 
    /** The number of frequent sets (nodes in t-tree with above minimum 
    support) generated so far. */
    protected int numFrequentsets =0;
    /** The number of updates required to generate the T-tree. */
    protected long numUpdates   = 0l;
    
    /* ------ CONSTRUCTORS ------ */

    /** Processes command line arguments. */
    
    public TotalSupportTree(String[] args) {
	super(args);
	// Create RuleList object for later usage
	currentRlist = new RuleList();
	}
	
    /* ------ METHODS ------ */
    
    /*----------------------------------------------------------------------- */
    /*                                                                        */
    /*                       T-TREE BUILDING METHODS                          */
    /*                                                                        */
    /*----------------------------------------------------------------------- */
    
    /* CREATE TOTAL SUPPORT TREE */
    /** Commences process of generating a total support tree (T-tree). */
                
    public void createTotalSupportTree() {	
	System.out.println("APRIORI-T WITH X-CHECKING\n" +
	                   "-------------------------");
	System.out.println("Minimum support threshold = " + 
				twoDecPlaces(support) + "% " + "(" + 
				twoDecPlaces(minSupport) + " records)");
        
	// If no data (possibly as a result of an order and pruning operation)
	// return
	if (numOneItemSets==0) return;
	
	// Set RuleList conversion and reconversion values so that they are the
	// same as those inherited from AssocRuleMining class
	currentRlist.setReconversionArrayRefs(conversionArray,reconversionArray);
	
	// Create Top level of T-tree (First pass of dataset)	
	startTtreeRef   = null;
	numFrequentsets = 0;
	numUpdates      = 0l;
	createTtreeTopLevel();
	
	// Generate level 2
	generateLevel2();
	
	// Further passes of the dataset	
	createTtreeLevelN();
	}
	
    /* CREATE T-TREE TOP LEVEL */
    /** Generates level 1 (top) of the T-tree. */
    		
    protected void createTtreeTopLevel() {
        
	// Dimension and initialise top level of T-tree	
	startTtreeRef = new TtreeNode[numOneItemSets+1];
	for (int index=1;index<=numOneItemSets;index++) 
	    			startTtreeRef[index] = new TtreeNode();
	    
        // Add support for each 1 itemset
	createTtreeTopLevel2();

	// Prune top level, setting any unsupported 1-itemsets to null 
	pruneLevelN(startTtreeRef,1); 
	}
    
    /* CREATE T-TREE TOP LEVEL 2 */	
    /** Adds supports to level 1 (top) of the T-tree. */
    	
    protected void createTtreeTopLevel2() {
	    
        // Loop through data set record by record and add support for each
	// 1 itemset
        
	for (int index1=0;index1<dataArray.length;index1++) {
	    // Non null record (if initial data set has been reordered and
	    // pruned some records may be empty!
	    if (dataArray[index1] != null) {
    	        for (int index2=0;index2<dataArray[index1].length;index2++) {
		    startTtreeRef[dataArray[index1][index2]].support++; 
		    numUpdates++;
		    }
		}
	    }
	}	
	
    /* CREATE T-TREE LEVEL N */ 
    /** Commences the process of determining the remaining levels in the T-tree
    (other than the top level), level by level in an "Apriori" manner. <P>
    Follows an add support, prune, generate loop until there are no more levels 
    to generate. */
    
    protected void createTtreeLevelN() {
        int nextLevel=2;
   	
	// Loop while a further level exists
	
	while (nextLevelExists) { 	    
            // Add support
	    addSupportToTtreeLevelN(nextLevel);
	    // Prune unsupported candidate sets
	    pruneLevelN(startTtreeRef,nextLevel);
	    // Check number of frequent sets generated so far
	    if (numFrequentsets>MAX_NUM_FREQUENT_SETS) {
	        System.out.println("Number of frequent sets (" +
				numFrequentsets + ") generted so far " + 
				"exceeds limit of " + MAX_NUM_FREQUENT_SETS +
				", generation process stopped!");
		break;	
		}
	    // Attempt to generate next level 
	    nextLevelExists=false;
	    generateLevelN(startTtreeRef,nextLevel,null); 
	    nextLevel++;
	    }
	    
	//End
	System.out.println("Levels in T-tree = " + nextLevel);  
	}
 
    /* ------------------------------------ */
    /* ADD SUPPORT VALUES TO T-TREE LEVEL N */  
    /* ------------------------------------ */  
    /** Commences process of adding support to a given level in the T-tree 
    (other than the top level). 
    @param level the current level number (top level = 1). */
     	
    protected void addSupportToTtreeLevelN(int level) {
	// Loop through data set record by record
        for (int index=0;index<dataArray.length;index++) {
	    // Non null record (if initial data set has been reordered and
	    // pruned some records may be empty
	    if (dataArray[index] != null) 
	        		addSupportToTtreeFindLevel(startTtreeRef,level,
				dataArray[index].length,dataArray[index]);
	    }
	} 
	
    /* ADD SUPPORT TO T-TREE FIND LEVEL */    
    /** Adds support to a given level in the T-tree (other than the top level).
    <P> Operates in a recursive manner to first find the appropriate level in 
    the T-tree before processing the required level (when found). 
    @param linkRef the reference to the current sub-branch of T-tree (start at 
    top of tree)
    @param level the level marker, set to the required level at the start and 
    then decremented by 1 on each recursion. 
    @param endIndex the length of current level in a sub-branch of the T-tree.
    @param itemSet the current itemset under consideration. */
    
    protected void addSupportToTtreeFindLevel(TtreeNode[] linkRef, int level, 
    			int endIndex, short[] itemSet) {

	// At right level;
	
	if (level == 1) {
	    // Step through itemSet
	    for (int index1=0;index1 < endIndex;index1++) {
		// If valid node update, i.e. a non null node
		if (linkRef[itemSet[index1]] != null) {
		    linkRef[itemSet[index1]].support++; 
		    numUpdates++;
		    }
		}
	    }
	
	// At wrong level
	
	else {
	    // Step through itemSet
	    for (int index=level-1;index<endIndex;index++) {		
		// If child branch step down branch
		if (linkRef[itemSet[index]] != null) {
		    if (linkRef[itemSet[index]].childRef != null) 
		    	 addSupportToTtreeFindLevel(linkRef[itemSet[index]].childRef,
						level-1,index,itemSet);
		    }
		}
	    }	
	}
	
    /*---------------------------------------------------------------------- */
    /*                                                                       */
    /*                                 PRUNING                               */
    /*                                                                       */
    /*---------------------------------------------------------------------- */ 
    
    /* PRUNE LEVEL N */
    
    /** Prunes the given level in the T-tree. <P> Operates in a recursive 
    manner to first find the appropriate level in the T-tree before processing 
    the required level (when found). Pruning carried out according to value of
    <TT>minSupport</TT> field.
    @param linkRef The reference to the current sub-branch of T-tree (start at 
    top of tree)
    @param level the level marker, set to the required level at the start and 
    then decremented by 1 on each recursion. 
    @return true if all nodes at a given level in the given branch of the 
    T-tree have been pruned, false otherwise. */
    
    protected boolean pruneLevelN(TtreeNode [] linkRef, int level) {
        int size = linkRef.length;
	
	// At right level;	
	if (level == 1) {
	    boolean allUnsupported = true;
	    // Step through level and set to null where below min support
	    for (int index1=1;index1<size;index1++) {
	        if (linkRef[index1] != null) {
	            if (linkRef[index1].support < minSupport) 
		    		linkRef[index1] = null;
	            else {
		        numFrequentsets++;
			allUnsupported = false;
			}
		    }
		}
	    return(allUnsupported);
	    }
	    	
	// Wrong level, Step through row
	for (int index1=level;index1<size;index1++) {
	    if (linkRef[index1] != null) {		
		// If child branch step down branch
		if (linkRef[index1].childRef != null) {
		    if (pruneLevelN(linkRef[index1].childRef,level-1)) 
			    		linkRef[index1].childRef=null;
		    }
		}
	    }	
	return(false);
	}
					
    /*---------------------------------------------------------------------- */
    /*                                                                       */
    /*                            LEVEL GENERATION                           */
    /*                                                                       */
    /*---------------------------------------------------------------------- */ 
    
    /* GENERATE LEVEL 2 */
    
    /** Generates level 2 of the T-tree. <P> The general 
    <TT>generateLevelN</TT> method assumes we have to first find the right 
    level in the T-tree, that is not necessary in this case of level 2. */
    
    protected void generateLevel2() {	    
	
	// Set next level flag	
	nextLevelExists=false;
	
	// loop through top level	
	for (int index=2;index<startTtreeRef.length;index++) {
	    // If supported T-tree node (i.e. it exists)
	    if (startTtreeRef[index] != null) generateNextLevel(startTtreeRef,
	    				index,realloc2(null,(short) index));	
	    }
	}
	
    /* GENERATE LEVEL N */
    
    /** Commences process of generating remaining levels in the T-tree (other 
    than top and 2nd levels). <P> Proceeds in a recursive manner level by level
    until the required level is reached. Example, if we have a T-tree of the form:
    
    <PRE>
    (A) ----- (B) ----- (C)
               |         |
	       |         |
	      (A)       (A) ----- (B)
    </PRE><P>	                           
    Where all nodes are supported and we wish to add the third level we would
    walk the tree and attempt to add new nodes to every level 2 node found.
    Having found the correct level we step through starting from B (we cannot
    add a node to A), so in this case there is only one node from which a level
    3 node may be attached. 
    @param linkRef the reference to the current sub-branch of T-tree (start at 
    top of tree).
    @param level the level marker, set to the required level at the start and
    then decremented by 1 on each recursion.
    @param itemSet the current itemset under consideration. */
    
    protected void generateLevelN(TtreeNode[] linkRef, int level, 
    							short[] itemSet) {
	int index1;
	int localSize = linkRef.length;
	
	// Correct level
	
	if (level == 1) {
	    for (index1=2;index1<localSize;index1++) {
	        // If supported T-tree node
	    	if (linkRef[index1] != null) generateNextLevel(linkRef,index1,
					realloc2(itemSet,(short) index1));		
	        }
	    }
	
	// Wrong level
	
	else {
		
	    for (index1=level;index1<localSize;index1++) {
	        // If supported T-tree node
	        if (linkRef[index1]!=null && linkRef[index1].childRef!=null) {
		    generateLevelN(linkRef[index1].childRef,level-1,
		    			realloc2(itemSet,(short) index1));
		    }	
		}
	    }
	}

    /* GENERATE NEXT LEVEL */
    
    /** Generates a new level in the T-tree from a given "parent" node. <P> 
    Example 1, given the following:
    
    <PRE>
    (A) ----- (B) ----- (C)
               |         |
	       |         |
	      (A)       (A) ----- (B) 
    </PRE><P>	      
    where we wish to add a level 3 node to node (B), i.e. the node {A}, we 
    would proceed as follows:
    <OL>
    <LI> Generate a new level in the T-tree attached to node (B) of length 
    one less than the numeric equivalent of B i.e. 2-1=1.
    <LI> Loop through parent level from (A) to node immediately before (B). 
    <LI> For each supported parent node create an itemset label by combing the
    index of the parent node (e.g. A) with the complete itemset label for B --- 
    {C,B} (note reverse order), thus for parent node (B) we would get a new
    level in the T-tree with one node in it --- {C,B,A} represented as A.
    <LI> For this node to be a candidate large item set its size-1 subsets must 
    be supported, there are three of these in this example {C,A}, {C,B} and
    {B,A}. We know that the first two are supported because they are in the
    current branch, but {B,A} is in another branch. So we must generate this

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -