📄 totalsupporttree.java
字号:
set and test it. More generally we must test all cardinality-1 subsets
which do not include the first element. This is done using the method
<TT>testCombinations</TT>.
</OL>
<P>Example 2, given:
<PRE>
(A) ----- (D)
|
|
(A) ----- (B) ----- (C)
|
|
(A) ----- (B)
</PRE><P>
where we wish to add a level 4 node (A) to (B) this would represent the
complete label {D,C,B,A}, the N-1 subsets will then be {{D,C,B},{D,C,A},
{D,B,A} and {C,B,A}}. We know the first two are supported because they are
contained in the current sub-branch of the T-tree, {D,B,A} and {C,B,A} are
not.
</OL>
@param parentRef the reference to the level in the sub-branch of the T-tree
under consideration.
@param endIndex the index of the current node under consideration.
@param itemSet the complete label represented by the current node (required
to generate further itemsets to be X-checked). */
protected void generateNextLevel(TtreeNode[] parentRef, int endIndex,
short[] itemSet) {
parentRef[endIndex].childRef = new TtreeNode[endIndex]; // New level
short[] newItemSet;
// Generate a level in Ttree
TtreeNode currentNode = parentRef[endIndex];
// Loop through parent sub-level of siblings upto current node
for (int index=1;index<endIndex;index++) {
// Check if "uncle" element is supported (i.e. it exists)
if (parentRef[index] != null) {
// Create an appropriate itemSet label to test
newItemSet = realloc2(itemSet,(short) index);
if (testCombinations(newItemSet)) {
currentNode.childRef[index] = new TtreeNode();
nextLevelExists=true;
}
else currentNode.childRef[index] = null;
}
}
}
/* TEST COMBINATIONS */
/** Commences the process of testing whether the N-1 sized sub-sets of a
newly created T-tree node are supported elsewhere in the Ttree --- (a
process referred to as "X-Checking"). <P> Thus given a candidate large
itemsets whose size-1 subsets are contained (supported) in the current
branch of the T-tree, tests whether size-1 subsets contained in other
branches are supported. Proceed as follows:
<OL>
<LI> Using current item set split this into two subsets:
<P>itemSet1 = first two items in current item set
<P>itemSet2 = remainder of items in current item set
<LI> Calculate size-1 combinations in itemSet2
<LI> For each combination from (2) append to itemSet1
</OL>
<P>Example 1:
<PRE>
currentItemSet = {A,B,C}
itemSet1 = {B,A} (change of ordering)
size = {A,B,C}-2 = 1
itemSet2 = {C} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C}
</PRE>
<P>Example 2:
<PRE>
currentItemSet = {A,B,C,D}
itemSet1 = {B,A} (change of ordering)
itemSet2 = {C,D} (currentItemSet with first two elements removed)
calculate combinations between {B,A} and {C,D}
</PRE>
@param currentItemSet the given itemset. */
protected boolean testCombinations(short[] currentItemSet) {
// No need to test 1- and 2-itemsets
if (currentItemSet.length < 3) return(true);
// Create itemSet1 (note ordering)
short[] itemSet1 = new short[2];
itemSet1[0] = currentItemSet[1];
itemSet1[1] = currentItemSet[0];
// Creat itemSet2
int size = currentItemSet.length-2;
short[] itemSet2 = removeFirstNelements(currentItemSet,2);
// Calculate combinations
return(combinations(null,0,2,itemSet1,itemSet2));
}
/* COMBINATIONS */
/** Determines the cardinality N combinations of a given itemset and then
checks whether those combinations are supported in the T-tree. <P>
Operates in a recursive manner.
<P>Example 1: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C}
<PRE>
itemSet2.length = 1
endIndex = 2 greater than itemSet2.length if condition succeeds
tesSet = null+{B,A} = {B,A}
retutn true if {B,A} supported and null otherwise
</PRE>
<P>Example 2: Given --- sofarSet=null,
startIndex=0, endIndex=2, itemSet1 = {B,A} and itemSet2 = {C,D}
<PRE>
endindex not greater than length {C,D}
go into loop
tempSet = {} + {C} = {C}
combinations with --- sofarSet={C}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {C} + {B,A} = {C,B,A}
tempSet = {} + {D} = {D}
combinations with --- sofarSet={D}, startIndex=1,
endIndex=3, itemSet1 = {B,A} and itemSet2 = {C}
endIndex greater than length {C,D}
testSet = {D} + {B,A} = {D,B,A}
</PRE>
@param sofarSet The combination itemset generated so far (set to null at
start)
@param startIndex the current index in the given itemSet2 (set to 0 at
start).
@param endIndex The current index of the given itemset (set to 2 at start)
and incremented on each recursion until it is greater than the length of
itemset2.
@param itemSet1 The first two elements (reversed) of the total label for the
current item set.
@param itemSet2 The remainder of the current item set.
*/
private boolean combinations(short[] sofarSet, int startIndex,
int endIndex, short[] itemSet1, short[] itemSet2) {
// At level
if (endIndex > itemSet2.length) {
short[] testSet = append(sofarSet,itemSet1);
// If testSet exists in the T-tree sofar then it is supported
return(findItemSetInTtree(testSet));
}
// Otherwise
else {
short[] tempSet;
for (int index=startIndex;index<endIndex;index++) {
tempSet = realloc2(sofarSet,itemSet2[index]);
if (!combinations(tempSet,index+1,endIndex+1,itemSet1,
itemSet2)) return(false);
}
}
// Return
return(true);
}
/*---------------------------------------------------------------------- */
/* */
/* T-TREE SEARCH METHODS */
/* */
/*---------------------------------------------------------------------- */
/* FIND ITEM SET IN T-TREE*/
/** Commences process of determining if an itemset exists in a T-tree. <P>
Used to X-check existence of Ttree nodes when generating new levels of the
Tree. Note that T-tree node labels are stored in "reverse", e.g. {3,2,1}.
@param itemSet the given itemset (IN REVERSE ORDER).
@return returns true if itemset found and false otherwise. */
protected boolean findItemSetInTtree(short[] itemSet) {
// first element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[0]] != null) {
int lastIndex = itemSet.length-1;
// If single item set return true
if (lastIndex == 0) return(true);
// Otherwise continue down branch
else if (startTtreeRef[itemSet[0]].childRef!=null) {
return(findItemSetInTtree2(itemSet,1,lastIndex,
startTtreeRef[itemSet[0]].childRef));
}
else return(false);
}
// Item set not in Ttree
else return(false);
}
/** Returns true if the given itemset is found in the T-tree and false
otherwise. <P> Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given T-tree level (set to 1 at
start).
@param lastIndex the end index of the current T-tree level.
@param linRef the reference to the current T-tree level.
@return returns true if itemset found and false otherwise. */
private boolean findItemSetInTtree2(short[] itemSet, int index,
int lastIndex, TtreeNode[] linkRef) {
// Attribute at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If attribute at "index" is last element of item set then item set
// found
if (index == lastIndex) return(true);
// Otherwise continue
else if (linkRef[itemSet[index]].childRef!=null) {
return(findItemSetInTtree2(itemSet,index+1,lastIndex,
linkRef[itemSet[index]].childRef));
}
else return(false);
}
// Item set not in Ttree
else return(false);
}
/* GET SUPPORT FOT ITEM SET IN T-TREE */
/** Commences process for finding the support value for the given item set
in the T-tree. <P> Used when generating Association Rules (ARs). Note that
itemsets are stored in reverse order in the T-tree therefore the given
itemset must be processed in reverse.
@param itemSet the given itemset.
@return returns the support value (0 if not found). */
protected int getSupportForItemSetInTtree(short[] itemSet) {
int lastIndex = itemSet.length-1;
// Last element of itemset in Ttree (Note: Ttree itemsets stored in
// reverse)
if (startTtreeRef[itemSet[lastIndex]] != null) {
// If single item set return support
if (lastIndex == 0) return(startTtreeRef[itemSet[0]].support);
// Otherwise continue down branch
else return(getSupportForItemSetInTtree2(itemSet,lastIndex-1,
startTtreeRef[itemSet[lastIndex]].childRef));
}
// Item set not in Ttree thererfore return 0
else return(0);
}
/** Returns the support value for the given itemset if found in the T-tree
and 0 otherwise. <P> Operates recursively.
@param itemSet the given itemset.
@param index the current index in the given itemset.
@param linRef the reference to the current T-tree level.
@return returns the support value (0 if not found). */
private int getSupportForItemSetInTtree2(short[] itemSet, int index,
TtreeNode[] linkRef) {
// Element at "index" in item set exists in Ttree
if (linkRef[itemSet[index]] != null) {
// If element at "index" is last element of item set then item set
// found
if (index == 0) return(linkRef[itemSet[0]].support);
// Otherwise continue
else return(getSupportForItemSetInTtree2(itemSet,index-1,
linkRef[itemSet[index]].childRef));
}
// Item set not in Ttree therefore return 0
else return(0);
}
/*----------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE (AR) GENERATION */
/* */
/*----------------------------------------------------------------------- */
/* GENERATE ASSOCIATION RULES */
/** Initiates process of generating Association Rules (ARs) from a
T-tree. */
public void generateARs() {
// Command line interface output
System.out.println("GENERATE ARs:\n-------------");
// Set rule data structure to null
currentRlist.startRulelist = null;
// Generate
generateARs2();
}
/** Loops through top level of T-tree as part of the AR generation
process. */
private void generateARs2() {
// Loop
for (int index=1;index <= numOneItemSets;index++) {
if (startTtreeRef[index] !=null) {
if (startTtreeRef[index].support >= minSupport) {
short[] itemSetSoFar = new short[1];
itemSetSoFar[0] = (short) index;
generateARs(itemSetSoFar,index,
startTtreeRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Continues process of generating association rules from a T-tree by
recursively looping through T-tree level by level.
@param itemSetSofar the label for a T-tree node as generated sofar.
@param size the length/size of the current array lavel in the T-tree.
@param linkRef the reference to the current array level in the T-tree. */
protected void generateARs(short[] itemSetSofar, int size,
TtreeNode[] linkRef) {
// If no more nodes return
if (linkRef == null) return;
// Otherwise process
for (int index=1; index < size; index++) {
if (linkRef[index] != null) {
if (linkRef[index].support >= minSupport) {
// Temp itemset
short[] tempItemSet = realloc2(itemSetSofar,(short) index);
// Generate ARs for current large itemset
generateARsFromItemset(tempItemSet,linkRef[index].support);
// Continue generation process
generateARs(tempItemSet,index,linkRef[index].childRef);
}
}
}
}
/* GENERATE ASSOCIATION RULES */
/** Generates all association rules for a given large item set found in a
T-tree structure. <P> Called from <TT>generateARs</TT> method.
@param itemSet the given large itemset.
@param support the associated support value for the given large itemset. */
private void generateARsFromItemset(short[] itemSet, double support) {
// Determine combinations
short[][] combinations = combinations(itemSet);
// Loop through combinations
for(int index=0;index<combinations.length;index++) {
// Find complement of combination in given itemSet
short[] complement = complement(combinations[index],itemSet);
// If complement is not empty generate rule
if (complement != null) {
double confidenceForAR = getConfidence(combinations[index],
support);
if (confidenceForAR >= confidence) {
currentRlist.insertRuleintoRulelist(combinations[index],
complement,confidenceForAR);
}
}
}
}
/*----------------------------------------------------------------------- */
/* */
/* GET METHODS */
/* */
/*----------------------------------------------------------------------- */
/* GET CONFIDENCE */
/** Calculates and returns the confidence for an AR given the antecedent
item set and the support for the total item set.
@param antecedent the antecedent (LHS) of the AR.
@param support the support for the large itemset from which the AR is
generated.
@return the associated confidence value. */
protected double getConfidence(short[] antecedent, double support) {
// Get support for antecedent
double supportForAntecedent = (double)
getSupportForItemSetInTtree(antecedent);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -