📄 hashtree.java

📁 apriori演算法於JAVA環境下開發用於資料探勘分類產生規則
💻 JAVA
字号:
/**
 * @(#)HashTree.java
 *
 * This class represents the data structure in which the candidate itemsets
 * are stored.
 *
 * @author Adrian Bright
 */

import java.util.*;
import java.io.*;

public class HashTree
{
	/* Identifier used to identify the level in the tree, starting at the root node (0) */
	private static final int TOP_LEVEL = 0;
	/* The number of stems at each level.. */
	private static int stemNum = 3; 
	/* The number of itemsets each bucket will hold. */
	private static int bucketSize = 3;
	/* Array which stores the definitions of each stem, ie. which keys hash into each stem. */
	StemDescription[] stemDescriptors;
	/* The root node of the hashtree. */
	HashMap tree;
	/* Stores the identifiers of each item, eg. '1' may represent "beer". */
	LinkedList itemsRef;
	
	/**
     * HashTree Initialises the HashTree
     * @param itemsReference The list of identifiers for each item.
     */
	public HashTree(LinkedList itemsReference)
	{
		itemsRef = itemsReference;
		tree = new HashMap();
		defineStems(itemsRef);
	}

	/**
     * addCandidates Adds the candidate itemsets to the Hashtree.
     * @param currentItemsets The list of itemsets to add to the hashtree.
     */
	public void addCandidates(LinkedList currentItemsets)
	{
		ListIterator itItemsets = currentItemsets.listIterator(0);
		ItemSet currentItemset;
		LinkedList currentItemsetItems;
		ListIterator itItemsetItems;
	
		MyTreeNode newBucket;
		String currentItem;
	
		int count;
		Character currentStemID;

		while (itItemsets.hasNext())
		{
			/* search in the tree to see if there already exists a matching entry; 
			 * if not then insert a new bucket */
			currentItemset = (ItemSet)itItemsets.next();
			currentItemsetItems = currentItemset.getItems();
			itItemsetItems = currentItemsetItems.listIterator(0);
			
			currentItem = (String)itItemsetItems.next(); // only need to look at first item NO NEED FOR LOOP!
			/* refer to stem descriptors to get relevant ID */
			for (count=0; count<stemDescriptors.length && (!((StemDescription)stemDescriptors[count]).keyExists(currentItem)); count++)
			{
			}
			currentStemID = ((StemDescription)stemDescriptors[count]).getID();
			if (tree.containsKey(currentStemID))
			{
				((MyTreeNode)tree.get(currentStemID)).addItemset(currentItemset);
			}
			else
			{
				newBucket = new MyTreeNode(bucketSize,stemNum,itemsRef,stemDescriptors,TOP_LEVEL+1);
				newBucket.addItemset(currentItemset);
				tree.put(currentStemID,newBucket);
			}
		}
	}

	/**
     * defineStems Defines the keys which will hash onto each stem.
     * @param itemsRef The list of identifiers for each item.
     */
	public void defineStems(LinkedList itemsRef)
	{
		int count;
		char stemIDcount = 'A';
		ListIterator itItemsRef = itemsRef.listIterator(0);
		Character stemID;		
		/* define stem descriptors */
		stemDescriptors = new StemDescription[stemNum]; // each stem will have a StemDescription
		for (count=0; count<stemNum; count++)
		{
			stemID = new Character(stemIDcount);
			stemIDcount++;
			stemDescriptors[count] = new StemDescription(stemID);
		}

		while (itItemsRef.hasNext())
		{
			for (count=0; itItemsRef.hasNext() && count < stemNum; count++)
			{
				stemDescriptors[count].addKey(((ItemRef)itItemsRef.next()).getItemID());
			}
			
		}

		/* print stem descriptors */
		/*for (count=0; count<stemDescriptors.length; count++)
		{
			System.out.println("Stem " + count + " id " + ((StemDescription)stemDescriptors[count]).getID() + ": " + ((StemDescription)stemDescriptors[count]).getKeys());
		}*/
	}

	/**
     * passTransactions Passes transactions over the candidate itemsets,
	 * incrementing their support count as required.
	 * @param level The level in the tree at which the transactions need to be inserted.
	 * @param fileName The file in which the transactions are stored.
     */
	public void passTransactions(int level,String fileName)
	{
		try
		{
			Transaction newTrans = new Transaction();
			ArrayList candidateItemsets;
			ListIterator itTransItems;
			
			MyTreeNode currentNode;
			String currentTransItem;
		
			int count;
			Character currentStemID;

			BufferedReader inputStream = new BufferedReader(new FileReader(fileName));
			String line = null;
			line = inputStream.readLine();
			StringTokenizer st;
			LinkedList items = new LinkedList();
			String currentTransactionID = new String();
			String newTransactionID;
			LinkedList transItems = new LinkedList();
			boolean firstLine = true;

			String nextLine;
			boolean lastLine = false;

			line = inputStream.readLine();
			nextLine = line;
			while (line != null)
			{
				nextLine = inputStream.readLine();
				st = new StringTokenizer(line);
				newTransactionID = st.nextToken();
				if (!newTransactionID.equals(currentTransactionID))
				{
					if (!firstLine || lastLine)
					{
						newTrans.setItems(items);
						transItems = (LinkedList)newTrans.getItems();
						itTransItems = items.listIterator(0);
						currentTransItem = (String)itTransItems.next();
						for (count=0; count < stemDescriptors.length; count++)
						{
							currentStemID = ((StemDescription)stemDescriptors[count]).getID();
							if (tree.containsKey(currentStemID))
							{
								currentNode = (MyTreeNode)tree.get(currentStemID);
								currentNode.passTransaction(newTrans,level);
							}
						}
					}
					else
					{
						firstLine = false;	
					}
					currentTransactionID = newTransactionID;
					newTrans = new Transaction();
					items = new LinkedList();
					newTrans.setTID(newTransactionID);
				}
				while (st.hasMoreTokens())
				{
					items.add(st.nextToken());
				}
				line = nextLine;
				
			}
			inputStream.close();

		}
		catch(FileNotFoundException e)
		{
			System.out.println("File " + fileName + " not found or could not be opened.");
		}
		catch(IOException e)
		{
			System.out.println("Error reading from file " + fileName + ".");
		}
	}

	/**
     * removeInfrequentItemsets Once the infrequent itemsets have been identified,
	 * this removes them from the hashtree.
     * @param minSup The minimum support for the search.
     */
	public void removeInfrequentItemsets(int minSup)
	{
		
		MyTreeNode currentNode;
	
		int count;
		Character currentStemID;

		for (count=0; count < stemDescriptors.length; count++)
		{
			currentStemID = ((StemDescription)stemDescriptors[count]).getID();
			if (tree.containsKey(currentStemID))
			{
				currentNode = (MyTreeNode)tree.get(currentStemID);
				currentNode.removeInfrequentItemsets(minSup);
			}
		}
	}

	/**
     * printTree Prints the contents of the tree to the screen.
     */
	public void printTree()
	{
		ArrayList treeContents = new ArrayList(tree.values());
		MyTreeNode currentNode;
		ArrayList bucketItemsets;
		ItemSet currentItemset;
		LinkedList currentItemsetItems;
		ListIterator items;
		System.out.println("Printing tree\n******************************");
		System.out.println("tree size " + treeContents.size());
		for (int count = 0; count < treeContents.size(); count++)
		{
			currentNode = (MyTreeNode)treeContents.get(count);
			currentNode.printContents();
		}
	}
	
	/**
     * printTree Prints the contents of the tree to the screen.
	 * @param n The number of items in the current itemset, eg. 1-itemset.
	 * @return The list of frequent itemsets of a particular length.
     */
	public LinkedList getFrequentNItemsets(int n)
	{
		LinkedList fNItemsetsArray = new LinkedList();
		
		MyTreeNode currentNode;
	
		Character currentStemID;

		for (int count=0; count < stemDescriptors.length; count++)
		{
			currentStemID = ((StemDescription)stemDescriptors[count]).getID();
			if (tree.containsKey(currentStemID))
			{
				currentNode = (MyTreeNode)tree.get(currentStemID);
				fNItemsetsArray.addAll(currentNode.getItemsets(n));
			}
		}
		return fNItemsetsArray;
	}

	/**
     * getAllFrequentItemsets Returns all the frequent itemsets, regardless of length.
	 * @return The list of frequent itemsets.
     */
	public ArrayList getAllFrequentItemsets()
	{
		ArrayList fNItemsetsArray = new ArrayList();
		
		MyTreeNode currentNode;
	
		Character currentStemID;

		for (int count=0; count < stemDescriptors.length; count++)
		{
			currentStemID = ((StemDescription)stemDescriptors[count]).getID();
			if (tree.containsKey(currentStemID))
			{
				currentNode = (MyTreeNode)tree.get(currentStemID);
				fNItemsetsArray.addAll(currentNode.getAllItemsets());
			}
		}
		return fNItemsetsArray;
	}

	/**
     * printAllFrequentItemsets Prints all the frequent itemsets to screen.
	 * @param numberOfTransactions The total number of transactions.
	 * @param minSupPercentage The minimum support express as a percentage.
     */
	public void printAllFrequentItemsets(int numberOfTransactions,int minSupPercentage)
	{
		System.out.println("Frequent itemsets:");
		ArrayList itemsets = getAllFrequentItemsets();
		ItemSet currentItemset;
		for (int count = 0; count < itemsets.size(); count++)
		{
			currentItemset = (ItemSet)itemsets.get(count);
			System.out.println("Itemset: " + currentItemset.itemsString(itemsRef) + ", support: " + ((currentItemset.getSupport()*100)/numberOfTransactions) + "%.");
		}
	}

	/**
     * printAllFrequentItemsets Prints all the frequent itemsets to file.
	 * @param filename The file to which the frequent itemset list should be written.
	 * @param numberOfTransactions The total number of transactions.
	 * @param minSupPercentage The minimum support express as a percentage.
     */
	public void printAllFrequentItemsets(String filename,int numberOfTransactions,int minSupPercentage)
	{
		PrintWriter outputStream = null;
		try
		{
			outputStream = new PrintWriter(new FileOutputStream(filename));
		}
		catch(FileNotFoundException e)
		{
			System.out.println("Error opening " + filename);
			System.exit(0);
		}
		ArrayList itemsets = getAllFrequentItemsets();
		ItemSet currentItemset;
		for (int count = 0; count < itemsets.size(); count++)
		{
			currentItemset = (ItemSet)itemsets.get(count);
			outputStream.println("Itemset: " + currentItemset.itemsString(itemsRef) + ", support: " + ((currentItemset.getSupport()*100)/numberOfTransactions) + "%.");
		}
		outputStream.close();
		System.out.println("--Frequent itemsets written to file.");
	}
}
💿 文件大小 27 K
👤 上传用户 dasdasdc
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#apriori #JAVA #算法 #分
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -