⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hashtree.java

📁 数据挖掘的工具代码(包含fp-tree,appriory
💻 JAVA
字号:
/*ARMiner - Association Rules MinerCopyright (C) 2000  UMass/Boston - Computer Science DepartmentThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (atyour option) any later version.This program is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307USAThe ARMiner Server was written by Dana Cristofor and LaurentiuCristofor.The ARMiner Client was written by Abdelmajid Karatihy, Xiaoyong Kuang,and Lung-Tsung Li.The ARMiner package is currently maintained by Laurentiu Cristofor(laur@cs.umb.edu).*//*     This file is a part of the ARMiner project.      (P)1999-2001 by ARMiner Server Team:   Dana Cristofor   Laurentiu Cristofor*/import java.util.*;/*  Maintenance log started on April 3rd, 2001 by Laurentiu Cristofor  Apr. 3rd, 2001    - modified the HashTree so that it can change                      dynamically its HashNode size in order to                      index large numbers of itemsets.*/ /**   HashTree.java<P>   A HashTree is a special data structure that is used to index   a Vector of Itemset objects for more efficient processing.   */public class HashTree{  private static final int LIST_NODE = 1;  private static final int HASH_NODE = 2;  private static final int DEFAULT_LIST_SIZE = 20;  private static final int DEFAULT_HASH_SIZE = 40;  private int LIST_SIZE;  private int HASH_SIZE;  private static class Node  {    public int type; // LIST_NODE or HASH_NODE  }  private class ListNode extends Node  {    public int[] indexes;   // index in Vector of Itemsets    public int size;        // how many indexes we keep in above array    public boolean visited; // have we seen this node?    public ListNode()    {      type = LIST_NODE;      indexes = new int[LIST_SIZE];      size = 0;      visited = false;    }  }  private class HashNode extends Node  {    public MyHashtable children;    public HashNode()    {      type = HASH_NODE;      children = new MyHashtable(HASH_SIZE);    }  }  private static class MyHashtable  {    public Node[] contents;    public MyHashtable(int size)    {      contents = new Node[size];    }    public void put(int key, Node n)    {      int index = key % contents.length;      contents[index] = n;    }    public Node get(int key)    {      int index = key % contents.length;      return contents[index];    }    public Enumeration elements()    {      return new Enumeration()	{	  int i = 0;	  public boolean hasMoreElements()	  {	    while (i < contents.length 		   && contents[i] == null)	      i++;	    if (i >= contents.length)	      return false;	    else	      return true;	  }	  public Object nextElement()	  {	    while (i < contents.length 		   && contents[i] == null)	      i++;	    if (i >= contents.length)	      throw new NoSuchElementException();	    else	      return contents[i++];	  }	};    }  }    private int counter;     // used for some computations  private Vector leaves;   // keeps all leaves of the HashTree  private Vector itemsets; // the Vector of Itemsets that we index  private Node theRoot;    // the root of the HashTree  private void unvisitLeaves()  {    for (int i = 0; i < leaves.size(); i++)      ((ListNode)leaves.get(i)).visited = false;  }  /**   * Create a new HashTree. The <code>listSize</code> parameter determines   * after how many inserts in a ListNode we have to change it to a    * HashNode (i.e. perform a split). The <code>hashSize</code> parameter   * can be specified to improve the efficiency of the structure.   *   * @param listSize   the size of the internal lists in the list nodes   * @param hashSize   the size of the internal hashtables in the hash nodes   * @param itemsets   the Vector of Itemsets that we should index   * @exception IllegalArgumentException   <code>itemsets</code> is null   * or <code>listSize <= 0</code> or <code>hashSize <= 0</code>     */  public HashTree(int listSize, int hashSize, Vector itemsets)  {    if (itemsets == null || listSize <= 0 || hashSize <= 0)      throw new IllegalArgumentException("invalid arguments to constructor");    LIST_SIZE = listSize;    HASH_SIZE = hashSize;    this.itemsets = itemsets;    theRoot = new ListNode();    leaves = new Vector();  }  /**   * Create a new HashTree. This initializes the HashTree with   * default parameters.   *   * @param itemsets   the Vector of Itemsets that we should index   * @exception IllegalArgumentException   <code>itemsets</code> is null   */  public HashTree(Vector itemsets)  {    this(DEFAULT_LIST_SIZE, DEFAULT_HASH_SIZE, itemsets);  }  /**   * This method should be called before calling update() to gather   * all leaves of the HashTree for more efficient processing.   */  public void prepareForDescent()  {    leaves.clear();    prepare(theRoot);  }  // private recursive method  private void prepare(Node node)  {    if (node.type == HASH_NODE)      {	Enumeration e = ((HashNode)node).children.elements();	while (e.hasMoreElements())	  prepare((Node)e.nextElement());      }    else // LIST_NODE      leaves.add(node);  }  // reset HashTree and readd all itemsets added previously  // this method may be called by add()  private void readdAll()  {    // increase size of hash nodes    HASH_SIZE = 2 * HASH_SIZE + 1;    // first save leaf nodes    prepareForDescent();    // then reset HashTree    theRoot = new ListNode();    // readd everything that we added before    for (int i = 0; i < leaves.size(); i++)      {	ListNode ln = (ListNode)leaves.get(i);	for (int j = 0; j < ln.size; j++)	  add(ln.indexes[j]);      }    // clean up    leaves.clear();  }  private static class HashTreeOverflowException extends RuntimeException  {  }  /**   * This method indexes in the HashTree the Itemset at index   * <code>index</code> from Vector <code>itemsets</code> which   * was passed to the constructor of this HashTree.   *   * @param index   the index of the Itemset that we need to index in   * this HashTree.   */  public void add(int index)  {    // repeat the operation if it results in a HashTree overflow    while (true)      try 	{	  theRoot = add(theRoot, 0, index);	  // if we get here then the add() was successful and we can	  // exit the loop	  break;	}      catch (HashTreeOverflowException e)	{	  // call readdAll to increase the size of hash nodes 	  // and readd all itemsets that were previously added	  readdAll();	}  }  // private recursive method  private Node add(Node node, int level, int index)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.size == LIST_SIZE) // list is full	  {	    // if the level is equal to the itemsets size and we	    // filled the list node, then we overflowed the HashTree.	    if (((Itemset)itemsets.get(index)).size() == level)	      throw new HashTreeOverflowException();	    // else, must split!	    HashNode hn = new HashNode();	    // hash the list elements	    for (int i = 0; i < LIST_SIZE; i++)	      add(hn, level, ln.indexes[i]);	    // add our node	    add(hn, level, index);	    // return this HashNode to replace old ListNode	    return hn;	  }	else // append index at end of list	  {	    ln.indexes[ln.size++] = index;	  }      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// compute hash key	Itemset is = (Itemset)itemsets.get(index);	int key = is.getItem(level);	// try to get next node	Node n = hn.children.get(key);	if (n == null) // no node, must create a new ListNode	  {	    ListNode ln = new ListNode();	    ln.indexes[ln.size++] = index; 	    hn.children.put(key, ln);	  }	else // found a node, do a recursive call	  {	    n = add(n, level + 1, index);	    hn.children.put(key, n);	  }      }    return node;  }  /**   * Update the weights of all indexed Itemsets that are included   * in <code>row</code>   *   * @param row   the Itemset (normally a database row) against which    * we test for inclusion   */  public void update(Itemset row)  {    update(theRoot, row, 0);    unvisitLeaves();  }  // private recursive method  private void update(Node node, Itemset row, int index)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.visited)	  return;	for (int i = 0; i < ln.size; i++)	  {	    Itemset is = (Itemset)itemsets.get(ln.indexes[i]);	    if (is.isIncludedIn(row))	      is.incrementWeight();	  }	ln.visited = true; // now we've seen this node      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// this is a tricky piece of algorithm that ensures we	// look for all possible subsets of row	for (int i = index; i < row.size(); i++)	  {	    int key = row.getItem(i);	    Node n = hn.children.get(key);	    if (n != null)	      update(n, row, i + 1);	  }      }  }  /**   * Update the weights of all indexed Itemsets that are included   * in <code>row</code> and also update the matrix <code>counts</code>   *   * @param row   the Itemset (normally a database row) against which    * we test for inclusion   * @param counts   a matrix used by some algorithms to speed up   * computations; its rows correspond to Itemsets and its columns   * correspond to items; each value in the matrix tells for how many   * times had an item appeared together with an itemset in the rows   * of the database.   */  public void update(Itemset row, long[][] counts)  {    update(theRoot, row, 0, counts);    unvisitLeaves();  }  // private recursive method  private void update(Node node, Itemset row, int index, long[][] counts)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.visited)	  return;	for (int i = 0; i < ln.size; i++)	  {	    Itemset is = (Itemset)itemsets.get(ln.indexes[i]);	    if (is.isIncludedIn(row))	      {		is.incrementWeight();		for (int j = 0; j < row.size(); j++)		  counts[ln.indexes[i]][row.getItem(j) - 1]++;	      }	  }	ln.visited = true; // now we've seen this node      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// this is a tricky piece of algorithm that ensures we	// look for all possible subsets of row	for (int i = index; i < row.size(); i++)	  {	    int key = row.getItem(i);	    Node n = hn.children.get(key);	    if (n != null)	      update(n, row, i + 1, counts);	  }      }  }  /**   * Count how many frequent Itemsets (frequent = having weight    * greater than a specified minimum weight) are included in    * <code>itemset</code>   *   * @param itemset   the Itemset for which we count the subsets   * @param minWeight   the minimum weight   */  public long countFrequentSubsets(Itemset itemset, long minWeight)  {    counter = 0;    countFrequentSubsets(theRoot, itemset, 0, minWeight);    unvisitLeaves();    return counter;  }  // private recursive method  private void countFrequentSubsets(Node node, Itemset itemset,				    int index, long minWeight)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.visited)	  return;	for (int i = 0; i < ln.size; i++)	  {	    Itemset is = (Itemset)itemsets.get(ln.indexes[i]);	    if (is.isIncludedIn(itemset) && is.getWeight() >= minWeight)	      counter++;	  }	ln.visited = true; // now we've seen this node      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// this is a tricky piece of algorithm that ensures we	// look for all possible subsets of row	for (int i = index; i < itemset.size(); i++)	  {	    int key = itemset.getItem(i);	    Node n = hn.children.get(key);	    if (n != null)	      countFrequentSubsets(n, itemset, i + 1, minWeight);	  }      }  }  /**   * Count how many Itemsets are included in <code>itemset</code>   *   * @param itemset   the Itemset for which we count the subsets   */  public long countSubsets(Itemset itemset)  {    counter = 0;    countSubsets(theRoot, itemset, 0);    unvisitLeaves();    return counter;  }  // private recursive method  private void countSubsets(Node node, Itemset itemset, int index)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.visited)	  return;	for (int i = 0; i < ln.size; i++)	  {	    Itemset is = (Itemset)itemsets.get(ln.indexes[i]);	    if (is.isIncludedIn(itemset))	      counter++;	  }	ln.visited = true; // now we've seen this node      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// this is a tricky piece of algorithm that ensures we	// look for all possible subsets of row	for (int i = index; i < itemset.size(); i++)	  {	    int key = itemset.getItem(i);	    Node n = hn.children.get(key);	    if (n != null)	      countSubsets(n, itemset, i + 1);	  }      }  }  /**   * Verifies if any of the indexed Itemsets is not large by checking   * whether they're included in the frequent itemset <code>itemset</code>.   * If an Itemset is not large then it will be marked.   *   * @param itemset   the Itemset we check   */  public void checkLargeness(Itemset itemset)  {    checkLargeness(theRoot, itemset, 0);    unvisitLeaves();  }  // private recursive method  private void checkLargeness(Node node, Itemset itemset, int index)  {    if (node.type == LIST_NODE)      {	ListNode ln = (ListNode)node;	if (ln.visited)	  return;	for (int i = 0; i < ln.size; i++)	  {	    Itemset is = (Itemset)itemsets.get(ln.indexes[i]);	    if (is.isIncludedIn(itemset))	      is.mark();	  }	ln.visited = true; // now we've seen this node      }    else // HASH_NODE      {	HashNode hn = (HashNode)node;	// this is a tricky piece of algorithm that ensures we	// look for all possible subsets of row	for (int i = index; i < itemset.size(); i++)	  {	    int key = itemset.getItem(i);	    Node n = hn.children.get(key);	    if (n != null)	      checkLargeness(n, itemset, i + 1);	  }      }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -