📄 hashtree.java
字号:
/**
* @(#)HashTree.java
*
* This class represents the data structure in which the candidate itemsets
* are stored.
*
* @author Adrian Bright
*/
import java.util.*;
import java.io.*;
public class HashTree
{
/* Identifier used to identify the level in the tree, starting at the root node (0) */
private static final int TOP_LEVEL = 0;
/* The number of stems at each level.. */
private static int stemNum = 3;
/* The number of itemsets each bucket will hold. */
private static int bucketSize = 3;
/* Array which stores the definitions of each stem, ie. which keys hash into each stem. */
StemDescription[] stemDescriptors;
/* The root node of the hashtree. */
HashMap tree;
/* Stores the identifiers of each item, eg. '1' may represent "beer". */
LinkedList itemsRef;
/**
* HashTree Initialises the HashTree
* @param itemsReference The list of identifiers for each item.
*/
public HashTree(LinkedList itemsReference)
{
itemsRef = itemsReference;
tree = new HashMap();
defineStems(itemsRef);
}
/**
* addCandidates Adds the candidate itemsets to the Hashtree.
* @param currentItemsets The list of itemsets to add to the hashtree.
*/
public void addCandidates(LinkedList currentItemsets)
{
ListIterator itItemsets = currentItemsets.listIterator(0);
ItemSet currentItemset;
LinkedList currentItemsetItems;
ListIterator itItemsetItems;
MyTreeNode newBucket;
String currentItem;
int count;
Character currentStemID;
while (itItemsets.hasNext())
{
/* search in the tree to see if there already exists a matching entry;
* if not then insert a new bucket */
currentItemset = (ItemSet)itItemsets.next();
currentItemsetItems = currentItemset.getItems();
itItemsetItems = currentItemsetItems.listIterator(0);
currentItem = (String)itItemsetItems.next(); // only need to look at first item NO NEED FOR LOOP!
/* refer to stem descriptors to get relevant ID */
for (count=0; count<stemDescriptors.length && (!((StemDescription)stemDescriptors[count]).keyExists(currentItem)); count++)
{
}
currentStemID = ((StemDescription)stemDescriptors[count]).getID();
if (tree.containsKey(currentStemID))
{
((MyTreeNode)tree.get(currentStemID)).addItemset(currentItemset);
}
else
{
newBucket = new MyTreeNode(bucketSize,stemNum,itemsRef,stemDescriptors,TOP_LEVEL+1);
newBucket.addItemset(currentItemset);
tree.put(currentStemID,newBucket);
}
}
}
/**
* defineStems Defines the keys which will hash onto each stem.
* @param itemsRef The list of identifiers for each item.
*/
public void defineStems(LinkedList itemsRef)
{
int count;
char stemIDcount = 'A';
ListIterator itItemsRef = itemsRef.listIterator(0);
Character stemID;
/* define stem descriptors */
stemDescriptors = new StemDescription[stemNum]; // each stem will have a StemDescription
for (count=0; count<stemNum; count++)
{
stemID = new Character(stemIDcount);
stemIDcount++;
stemDescriptors[count] = new StemDescription(stemID);
}
while (itItemsRef.hasNext())
{
for (count=0; itItemsRef.hasNext() && count < stemNum; count++)
{
stemDescriptors[count].addKey(((ItemRef)itItemsRef.next()).getItemID());
}
}
/* print stem descriptors */
/*for (count=0; count<stemDescriptors.length; count++)
{
System.out.println("Stem " + count + " id " + ((StemDescription)stemDescriptors[count]).getID() + ": " + ((StemDescription)stemDescriptors[count]).getKeys());
}*/
}
/**
* passTransactions Passes transactions over the candidate itemsets,
* incrementing their support count as required.
* @param level The level in the tree at which the transactions need to be inserted.
* @param fileName The file in which the transactions are stored.
*/
public void passTransactions(int level,String fileName)
{
try
{
Transaction newTrans = new Transaction();
ArrayList candidateItemsets;
ListIterator itTransItems;
MyTreeNode currentNode;
String currentTransItem;
int count;
Character currentStemID;
BufferedReader inputStream = new BufferedReader(new FileReader(fileName));
String line = null;
line = inputStream.readLine();
StringTokenizer st;
LinkedList items = new LinkedList();
String currentTransactionID = new String();
String newTransactionID;
LinkedList transItems = new LinkedList();
boolean firstLine = true;
String nextLine;
boolean lastLine = false;
line = inputStream.readLine();
nextLine = line;
while (line != null)
{
nextLine = inputStream.readLine();
st = new StringTokenizer(line);
newTransactionID = st.nextToken();
if (!newTransactionID.equals(currentTransactionID))
{
if (!firstLine || lastLine)
{
newTrans.setItems(items);
transItems = (LinkedList)newTrans.getItems();
itTransItems = items.listIterator(0);
currentTransItem = (String)itTransItems.next();
for (count=0; count < stemDescriptors.length; count++)
{
currentStemID = ((StemDescription)stemDescriptors[count]).getID();
if (tree.containsKey(currentStemID))
{
currentNode = (MyTreeNode)tree.get(currentStemID);
currentNode.passTransaction(newTrans,level);
}
}
}
else
{
firstLine = false;
}
currentTransactionID = newTransactionID;
newTrans = new Transaction();
items = new LinkedList();
newTrans.setTID(newTransactionID);
}
while (st.hasMoreTokens())
{
items.add(st.nextToken());
}
line = nextLine;
}
inputStream.close();
}
catch(FileNotFoundException e)
{
System.out.println("File " + fileName + " not found or could not be opened.");
}
catch(IOException e)
{
System.out.println("Error reading from file " + fileName + ".");
}
}
/**
* removeInfrequentItemsets Once the infrequent itemsets have been identified,
* this removes them from the hashtree.
* @param minSup The minimum support for the search.
*/
public void removeInfrequentItemsets(int minSup)
{
MyTreeNode currentNode;
int count;
Character currentStemID;
for (count=0; count < stemDescriptors.length; count++)
{
currentStemID = ((StemDescription)stemDescriptors[count]).getID();
if (tree.containsKey(currentStemID))
{
currentNode = (MyTreeNode)tree.get(currentStemID);
currentNode.removeInfrequentItemsets(minSup);
}
}
}
/**
* printTree Prints the contents of the tree to the screen.
*/
public void printTree()
{
ArrayList treeContents = new ArrayList(tree.values());
MyTreeNode currentNode;
ArrayList bucketItemsets;
ItemSet currentItemset;
LinkedList currentItemsetItems;
ListIterator items;
System.out.println("Printing tree\n******************************");
System.out.println("tree size " + treeContents.size());
for (int count = 0; count < treeContents.size(); count++)
{
currentNode = (MyTreeNode)treeContents.get(count);
currentNode.printContents();
}
}
/**
* printTree Prints the contents of the tree to the screen.
* @param n The number of items in the current itemset, eg. 1-itemset.
* @return The list of frequent itemsets of a particular length.
*/
public LinkedList getFrequentNItemsets(int n)
{
LinkedList fNItemsetsArray = new LinkedList();
MyTreeNode currentNode;
Character currentStemID;
for (int count=0; count < stemDescriptors.length; count++)
{
currentStemID = ((StemDescription)stemDescriptors[count]).getID();
if (tree.containsKey(currentStemID))
{
currentNode = (MyTreeNode)tree.get(currentStemID);
fNItemsetsArray.addAll(currentNode.getItemsets(n));
}
}
return fNItemsetsArray;
}
/**
* getAllFrequentItemsets Returns all the frequent itemsets, regardless of length.
* @return The list of frequent itemsets.
*/
public ArrayList getAllFrequentItemsets()
{
ArrayList fNItemsetsArray = new ArrayList();
MyTreeNode currentNode;
Character currentStemID;
for (int count=0; count < stemDescriptors.length; count++)
{
currentStemID = ((StemDescription)stemDescriptors[count]).getID();
if (tree.containsKey(currentStemID))
{
currentNode = (MyTreeNode)tree.get(currentStemID);
fNItemsetsArray.addAll(currentNode.getAllItemsets());
}
}
return fNItemsetsArray;
}
/**
* printAllFrequentItemsets Prints all the frequent itemsets to screen.
* @param numberOfTransactions The total number of transactions.
* @param minSupPercentage The minimum support express as a percentage.
*/
public void printAllFrequentItemsets(int numberOfTransactions,int minSupPercentage)
{
System.out.println("Frequent itemsets:");
ArrayList itemsets = getAllFrequentItemsets();
ItemSet currentItemset;
for (int count = 0; count < itemsets.size(); count++)
{
currentItemset = (ItemSet)itemsets.get(count);
System.out.println("Itemset: " + currentItemset.itemsString(itemsRef) + ", support: " + ((currentItemset.getSupport()*100)/numberOfTransactions) + "%.");
}
}
/**
* printAllFrequentItemsets Prints all the frequent itemsets to file.
* @param filename The file to which the frequent itemset list should be written.
* @param numberOfTransactions The total number of transactions.
* @param minSupPercentage The minimum support express as a percentage.
*/
public void printAllFrequentItemsets(String filename,int numberOfTransactions,int minSupPercentage)
{
PrintWriter outputStream = null;
try
{
outputStream = new PrintWriter(new FileOutputStream(filename));
}
catch(FileNotFoundException e)
{
System.out.println("Error opening " + filename);
System.exit(0);
}
ArrayList itemsets = getAllFrequentItemsets();
ItemSet currentItemset;
for (int count = 0; count < itemsets.size(); count++)
{
currentItemset = (ItemSet)itemsets.get(count);
outputStream.println("Itemset: " + currentItemset.itemsString(itemsRef) + ", support: " + ((currentItemset.getSupport()*100)/numberOfTransactions) + "%.");
}
outputStream.close();
System.out.println("--Frequent itemsets written to file.");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -