📄 totalsupporttree.java
字号:
/* ------------------------------------------------------------------------- */
/* */
/* TOTAL SUPPORT TREE */
/* */
/* Frans Coenen */
/* */
/* 10 January 2003 */
/* (Revised 23/1/3003, 8/2/2003, 18/3/2003, 3/3/2003. 7/4/2004) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* ------------------------------------------------------------------------- */
/* Structure:
AssocRuleMining
|
+-- TotalSupportTree */
/* Java packages */
import java.io.*;
import java.util.*;
/** Methods concerned with the generation, processing and manipulation of
T-tree data storage structures used to hold the total support counts for large
itemsets.
@author Frans Coenen
@version 3 July 2003 */
public class TotalSupportTree extends AssocRuleMining {
/* ------ FIELDS ------ */
// Data structures
/** The reference to start of t-tree. */
protected TtreeNode[] startTtreeRef;
// Constants
/** The maximum number of frequent sets that may be generated. */
protected final int MAX_NUM_FREQUENT_SETS = 80000;
// Other fields
/** The next level indicator flag: set to <TT>true</TT> if new level
generated and by default. */
protected boolean nextLevelExists = true ;
/** Instance of the calss RuleList */
protected RuleList currentRlist = null;
// Diagnostics
/** The number of frequent sets (nodes in t-tree with above minimum
support) generated so far. */
protected int numFrequentsets =0;
/** The number of updates required to generate the T-tree. */
protected long numUpdates = 0l;
/* ------ CONSTRUCTORS ------ */
/** Processes command line arguments. */
public TotalSupportTree(String[] args) {
super(args);
// Create RuleList object for later usage
currentRlist = new RuleList();
}
/* ------ METHODS ------ */
/*----------------------------------------------------------------------- */
/* */
/* T-TREE BUILDING METHODS */
/* */
/*----------------------------------------------------------------------- */
/* CREATE TOTAL SUPPORT TREE */
/** Commences process of generating a total support tree (T-tree). */
public void createTotalSupportTree() {
System.out.println("APRIORI-T WITH X-CHECKING\n" +
"-------------------------");
System.out.println("Minimum support threshold = " +
twoDecPlaces(support) + "% " + "(" +
twoDecPlaces(minSupport) + " records)");
// If no data (possibly as a result of an order and pruning operation)
// return
if (numOneItemSets==0) return;
// Set RuleList conversion and reconversion values so that they are the
// same as those inherited from AssocRuleMining class
currentRlist.setReconversionArrayRefs(conversionArray,reconversionArray);
// Create Top level of T-tree (First pass of dataset)
startTtreeRef = null;
numFrequentsets = 0;
numUpdates = 0l;
createTtreeTopLevel();
// Generate level 2
generateLevel2();
// Further passes of the dataset
createTtreeLevelN();
}
/* CREATE T-TREE TOP LEVEL */
/** Generates level 1 (top) of the T-tree. */
protected void createTtreeTopLevel() {
// Dimension and initialise top level of T-tree
startTtreeRef = new TtreeNode[numOneItemSets+1];
for (int index=1;index<=numOneItemSets;index++)
startTtreeRef[index] = new TtreeNode();
// Add support for each 1 itemset
createTtreeTopLevel2();
// Prune top level, setting any unsupported 1-itemsets to null
pruneLevelN(startTtreeRef,1);
}
/* CREATE T-TREE TOP LEVEL 2 */
/** Adds supports to level 1 (top) of the T-tree. */
protected void createTtreeTopLevel2() {
// Loop through data set record by record and add support for each
// 1 itemset
for (int index1=0;index1<dataArray.length;index1++) {
// Non null record (if initial data set has been reordered and
// pruned some records may be empty!
if (dataArray[index1] != null) {
for (int index2=0;index2<dataArray[index1].length;index2++) {
startTtreeRef[dataArray[index1][index2]].support++;
numUpdates++;
}
}
}
}
/* CREATE T-TREE LEVEL N */
/** Commences the process of determining the remaining levels in the T-tree
(other than the top level), level by level in an "Apriori" manner. <P>
Follows an add support, prune, generate loop until there are no more levels
to generate. */
protected void createTtreeLevelN() {
int nextLevel=2;
// Loop while a further level exists
while (nextLevelExists) {
// Add support
addSupportToTtreeLevelN(nextLevel);
// Prune unsupported candidate sets
pruneLevelN(startTtreeRef,nextLevel);
// Check number of frequent sets generated so far
if (numFrequentsets>MAX_NUM_FREQUENT_SETS) {
System.out.println("Number of frequent sets (" +
numFrequentsets + ") generted so far " +
"exceeds limit of " + MAX_NUM_FREQUENT_SETS +
", generation process stopped!");
break;
}
// Attempt to generate next level
nextLevelExists=false;
generateLevelN(startTtreeRef,nextLevel,null);
nextLevel++;
}
//End
System.out.println("Levels in T-tree = " + nextLevel);
}
/* ------------------------------------ */
/* ADD SUPPORT VALUES TO T-TREE LEVEL N */
/* ------------------------------------ */
/** Commences process of adding support to a given level in the T-tree
(other than the top level).
@param level the current level number (top level = 1). */
protected void addSupportToTtreeLevelN(int level) {
// Loop through data set record by record
for (int index=0;index<dataArray.length;index++) {
// Non null record (if initial data set has been reordered and
// pruned some records may be empty
if (dataArray[index] != null)
addSupportToTtreeFindLevel(startTtreeRef,level,
dataArray[index].length,dataArray[index]);
}
}
/* ADD SUPPORT TO T-TREE FIND LEVEL */
/** Adds support to a given level in the T-tree (other than the top level).
<P> Operates in a recursive manner to first find the appropriate level in
the T-tree before processing the required level (when found).
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree)
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param endIndex the length of current level in a sub-branch of the T-tree.
@param itemSet the current itemset under consideration. */
protected void addSupportToTtreeFindLevel(TtreeNode[] linkRef, int level,
int endIndex, short[] itemSet) {
// At right level;
if (level == 1) {
// Step through itemSet
for (int index1=0;index1 < endIndex;index1++) {
// If valid node update, i.e. a non null node
if (linkRef[itemSet[index1]] != null) {
linkRef[itemSet[index1]].support++;
numUpdates++;
}
}
}
// At wrong level
else {
// Step through itemSet
for (int index=level-1;index<endIndex;index++) {
// If child branch step down branch
if (linkRef[itemSet[index]] != null) {
if (linkRef[itemSet[index]].childRef != null)
addSupportToTtreeFindLevel(linkRef[itemSet[index]].childRef,
level-1,index,itemSet);
}
}
}
}
/*---------------------------------------------------------------------- */
/* */
/* PRUNING */
/* */
/*---------------------------------------------------------------------- */
/* PRUNE LEVEL N */
/** Prunes the given level in the T-tree. <P> Operates in a recursive
manner to first find the appropriate level in the T-tree before processing
the required level (when found). Pruning carried out according to value of
<TT>minSupport</TT> field.
@param linkRef The reference to the current sub-branch of T-tree (start at
top of tree)
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@return true if all nodes at a given level in the given branch of the
T-tree have been pruned, false otherwise. */
protected boolean pruneLevelN(TtreeNode [] linkRef, int level) {
int size = linkRef.length;
// At right level;
if (level == 1) {
boolean allUnsupported = true;
// Step through level and set to null where below min support
for (int index1=1;index1<size;index1++) {
if (linkRef[index1] != null) {
if (linkRef[index1].support < minSupport)
linkRef[index1] = null;
else {
numFrequentsets++;
allUnsupported = false;
}
}
}
return(allUnsupported);
}
// Wrong level, Step through row
for (int index1=level;index1<size;index1++) {
if (linkRef[index1] != null) {
// If child branch step down branch
if (linkRef[index1].childRef != null) {
if (pruneLevelN(linkRef[index1].childRef,level-1))
linkRef[index1].childRef=null;
}
}
}
return(false);
}
/*---------------------------------------------------------------------- */
/* */
/* LEVEL GENERATION */
/* */
/*---------------------------------------------------------------------- */
/* GENERATE LEVEL 2 */
/** Generates level 2 of the T-tree. <P> The general
<TT>generateLevelN</TT> method assumes we have to first find the right
level in the T-tree, that is not necessary in this case of level 2. */
protected void generateLevel2() {
// Set next level flag
nextLevelExists=false;
// loop through top level
for (int index=2;index<startTtreeRef.length;index++) {
// If supported T-tree node (i.e. it exists)
if (startTtreeRef[index] != null) generateNextLevel(startTtreeRef,
index,realloc2(null,(short) index));
}
}
/* GENERATE LEVEL N */
/** Commences process of generating remaining levels in the T-tree (other
than top and 2nd levels). <P> Proceeds in a recursive manner level by level
until the required level is reached. Example, if we have a T-tree of the form:
<PRE>
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
</PRE><P>
Where all nodes are supported and we wish to add the third level we would
walk the tree and attempt to add new nodes to every level 2 node found.
Having found the correct level we step through starting from B (we cannot
add a node to A), so in this case there is only one node from which a level
3 node may be attached.
@param linkRef the reference to the current sub-branch of T-tree (start at
top of tree).
@param level the level marker, set to the required level at the start and
then decremented by 1 on each recursion.
@param itemSet the current itemset under consideration. */
protected void generateLevelN(TtreeNode[] linkRef, int level,
short[] itemSet) {
int index1;
int localSize = linkRef.length;
// Correct level
if (level == 1) {
for (index1=2;index1<localSize;index1++) {
// If supported T-tree node
if (linkRef[index1] != null) generateNextLevel(linkRef,index1,
realloc2(itemSet,(short) index1));
}
}
// Wrong level
else {
for (index1=level;index1<localSize;index1++) {
// If supported T-tree node
if (linkRef[index1]!=null && linkRef[index1].childRef!=null) {
generateLevelN(linkRef[index1].childRef,level-1,
realloc2(itemSet,(short) index1));
}
}
}
}
/* GENERATE NEXT LEVEL */
/** Generates a new level in the T-tree from a given "parent" node. <P>
Example 1, given the following:
<PRE>
(A) ----- (B) ----- (C)
| |
| |
(A) (A) ----- (B)
</PRE><P>
where we wish to add a level 3 node to node (B), i.e. the node {A}, we
would proceed as follows:
<OL>
<LI> Generate a new level in the T-tree attached to node (B) of length
one less than the numeric equivalent of B i.e. 2-1=1.
<LI> Loop through parent level from (A) to node immediately before (B).
<LI> For each supported parent node create an itemset label by combing the
index of the parent node (e.g. A) with the complete itemset label for B ---
{C,B} (note reverse order), thus for parent node (B) we would get a new
level in the T-tree with one node in it --- {C,B,A} represented as A.
<LI> For this node to be a candidate large item set its size-1 subsets must
be supported, there are three of these in this example {C,A}, {C,B} and
{B,A}. We know that the first two are supported because they are in the
current branch, but {B,A} is in another branch. So we must generate this
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -