📄 transactionset.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Victor Borichev
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @version 1.0
*/
package com.prudsys.pdm.Models.Sequential.Algorithms.Seq;
import java.util.Vector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
/**
* Handling of the transaction list
* - List as a Vector
* - List as a HashTable
*
* Change by M. Thess: Now extends abstract TransactionSet class
*
* @author Stefan Ludwig
* @author Michael Thess
* @version 2.0, 2001/04/28
*/
public class TransactionSet
{
private static boolean dbg = false;
public static final int HASH_TABLE_SIZE = 1;
public Vector transactionList; // Set of 'ItemSet's
private int size=0;
private int minSuppCount;
private Vector[] hashTable =new Vector[HASH_TABLE_SIZE];
public TransactionSet()
{
transactionList = new Vector();
}
private static int HashFunction(int item)
{
return item % HASH_TABLE_SIZE;
}
public void setMinSuppCount(double minsupp)
{
minSuppCount=(int)(transactionList.size()*minsupp+0.9999999); //calculate minimum support count
}
/**
* Initialization of transaction set.
* Added by M. Thess to bring the transaction set into initional position
* without clearing its list.
*/
public void init()
{
hashTable = new Vector[HASH_TABLE_SIZE];
}
public int getMinSuppCount()
{
return minSuppCount;
}
public int getSize()
{
return size;
}
public int getSupportCount(ItemSet subset)
{
int suppcnt=0;
if (subset.getSize()<1) return 0;
ItemSet[] is=new ItemSet[subset.getSize()];
int[] from=new int[subset.getSize()];
int shortlength=Integer.MAX_VALUE;
int shortidx=0;
for (int i=0; i<subset.getSize(); i++)
{
is[i]=getHashTableEntry(subset.getIntegerAt(i));
if (is[i].getSize()<shortlength)
{
shortlength=is[i].getSize();
shortidx=i;
}
from[i]=0;
}
ItemSet tmp=is[0]; is[0]=is[shortidx]; is[shortidx]=tmp;
for (int i=0; i<is[0].getSize(); i++)
{
boolean miss=false;
for (int j=1; j<subset.getSize() && !miss; j++)
{
int idx=is[j].containsAtPos(is[0].getIntegerAt(i), from[j]);
if( idx<0 ) miss=true;
else
{
from[j]=idx;
}
}
if (!miss) suppcnt++;
}
return suppcnt;
}
/**
* Remove all transactions from transaction set.
*
* New since version 2.0 to support abstract specification.
*/
public void removeAllTransactions()
{
transactionList.removeAllElements();
size = 0;
}
public void addTransaction(ItemSet ItemSet)
{
transactionList.addElement(ItemSet);
size=transactionList.size();
}
/**
* Get itemset (i.e. transaction) at defined position.
*
* New since version 2.0 to support abstract specification.
*
* @param index position of desired itemset
* @return desired itemset, null if impossible
*/
public ItemSet getTransactionAt(int index)
{
return (ItemSet) transactionList.elementAt(index);
}
/**
* String representation of transaction set.
*
* @return String representation
*/
public String toString()
{
String text = "TransactionSet (" + size + " transactions)" + "\n";
for (int i = 0; i < size; i++)
{
text = text + "["+i+"]\t" + ((ItemSet)transactionList.elementAt(i)).toString();
if (i < size - 1)
text = text + "\n";
}
return text;
}
public void print()
{
System.out.println(toString());
}
private ItemSet getHashTableEntry(int item)
{
Vector isl = hashTable[HashFunction(item)];
for( int i=0; i < isl.size(); i++ )
if (((ItemSet)isl.elementAt(i)).getSupportCount()==item) return (ItemSet)isl.elementAt(i);
if (dbg) System.out.println("item "+item+" not found in hashTable");
return null;
}
public Vector buildHashTable()
{
if (dbg) System.out.println("Start building HashTable...");
Vector large1ItemSetList=new Vector();
int currItem;
for (int trans=0; trans<size; trans++)
{
if (dbg) if (trans %1000 ==0) System.out.println("adding transaction no. "+trans);
ItemSet basket=(ItemSet)transactionList.elementAt(trans);
for (int i=0; i<basket.getSize(); i++)
{
currItem=basket.getIntegerAt(i);
int Hidx=HashFunction(currItem); //Hash function
if (hashTable[Hidx]==null)
{
//no entry at all here
Vector hashList=new Vector();
ItemSet is=new ItemSet();
is.insertItem(trans);
is.setSupportCount(currItem);
hashList.addElement(is);
hashTable[Hidx]=hashList;
}
else
{
// search in hashTable[Hidx] for ItemSet w/ SupportCount==currItem
boolean found=false;
int j;
Vector currItemSet=(Vector)hashTable[Hidx];
for(j=0; j< currItemSet.size(); j++)
if (((ItemSet)currItemSet.elementAt(j)).getSupportCount()==currItem)
{
found=true;
break;
}
if(found)
{
((ItemSet)currItemSet.elementAt(j)).insertItem(trans);
}
else
{
ItemSet is=new ItemSet(); is.insertItem(trans); is.setSupportCount(currItem);
currItemSet.addElement(is);
}
}
}
}
if (dbg) System.out.println("cleaning up..");
for (int i=0; i<hashTable.length; i++)
{
if ((Vector)hashTable[i]==null) continue;
for (int j=0; j< ((Vector)hashTable[i]).size(); j++)
if ( ((ItemSet)((Vector)hashTable[i]).elementAt(j)).getSize()<minSuppCount)
{
((Vector)hashTable[i]).removeElementAt(j); j--;
}
else // include in list of 1-large itemsets
{
ItemSet is=new ItemSet();
is.addItem( ((ItemSet)((Vector)hashTable[i]).elementAt(j)).getSupportCount() );
is.setSupportCount( ((ItemSet)((Vector)hashTable[i]).elementAt(j)).getSize() );
if (!large1ItemSetList.contains(is)) large1ItemSetList.addElement(is);
}
if (((Vector)hashTable[i]).size()==0) hashTable[i]=null;
}
if (dbg) System.out.println("after cleanup");
if (dbg) System.out.println("done building HashTable");
return large1ItemSetList;
}
public void printHashTable()
{
for (int i=0; i<hashTable.length; i++)
{
if ((Vector)hashTable[i]==null) continue;
System.out.println("\nHashTable["+i+"]=");
for (int j=0; j< ((Vector)hashTable[i]).size(); j++)
System.out.println("\t\tITEM="+((ItemSet)((Vector)hashTable[i]).elementAt(j)).getSupportCount()
+"\tSupportCount="+((ItemSet)((Vector)hashTable[i]).elementAt(j)).getSize());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -