📄 transactionset.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: prudsys basket analysis
* Description: Basket analysis algorithms
* Copyright: Copyright (c) 2001
* Company: PRUDENTIAL SYSTEMS SOFTWARE GmbH
* @author Stefan Ludwig
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Models.Sequential.Algorithms.SeqSimple;
import java.util.Vector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
/**
* Handling of the transaction list
* - List as a ItemSetList
* - List as a HashTable
*
* @author Stefan Ludwig
* @author Michael Thess
* @version 2.0, 2001/04/28
*/
public class TransactionSet {
private static boolean dbg = false;
public static final int HASH_TABLE_SIZE = 7654;
public TransactionSet(){
transactionList = new Vector();
}
private static int HashFunction(int item) {
return item % HASH_TABLE_SIZE;
}
public Vector transactionList; // Set of 'ItemSet's
private int size=0;
private int minSuppCount;
private ItemSetList[] hashTable =new ItemSetList[HASH_TABLE_SIZE];
public void setMinSuppCount(double minsupp) {
minSuppCount=(int)(transactionList.size()*minsupp+0.9999999); //calculate minimum support count
}
/**
* Initialization of transaction set.
* Added by M. Thess to bring the transaction set into initional position
* without clearing its list.
*/
public void init() {
hashTable = new ItemSetList[HASH_TABLE_SIZE];
}
public int getMinSuppCount() {return minSuppCount;}
public int getSize() {return size;}
public int getSupportCount(ItemSet subset) {
/*
//old method!!!
System.out.println("V");
int suppcount=0;
for (int i=0; i<size; i++)
if (((ItemSet)transactionList.elementAt(i)).contains(subset) ) suppcount++;
System.out.println("^");
return suppcount;
*/
int suppcnt=0;
if (subset.getSize()<1) return 0;
//if (dbg) System.out.println("V");
ItemSet[] is=new ItemSet[subset.getSize()];
int[] from=new int[subset.getSize()];
int shortlength=Integer.MAX_VALUE;
int shortidx=0;
for (int i=0; i<subset.getSize(); i++) {
is[i]=getHashTableEntry(subset.getIntegerAt(i));
if (is[i].getSize()<shortlength) {shortlength=is[i].getSize(); shortidx=i;}
from[i]=0;
}
ItemSet tmp=is[0]; is[0]=is[shortidx]; is[shortidx]=tmp;
//if (dbg) System.out.println("shortidx="+shortidx);
for (int i=0; i<is[0].getSize(); i++) {
boolean miss=false;
// if ( i % 100==0) System.out.println(suppcnt);
for (int j=1; j<subset.getSize() && !miss; j++){
int idx=is[j].containsAtPos(is[0].getIntegerAt(i), from[j]);
if( idx<0 ) miss=true;
else {from[j]=idx;
//System.out.println(j+"="+idx);
}
}
if (!miss) suppcnt++;
}
//System.out.println("found suppcount "+suppcnt+" for");
//subset.print();
//if (dbg) System.out.println("^");
return suppcnt;
}
/**
* Remove all transactions from transaction set.
*/
public void removeAllTransactions() {
transactionList.removeAllElements();
size = 0;
}
public void addTransaction(ItemSet ItemSet)
{ transactionList.addElement(ItemSet); size=transactionList.size(); }
/**
* Get itemset (i.e. transaction) at defined position.
*
* @param index position of desired itemset
* @return desired itemset, null if impossible
*/
public ItemSet getTransactionAt(int index) {
return (ItemSet) transactionList.elementAt(index);
}
/**
* String representation of transaction set.
*
* @return String representation
*/
public String toString() {
String text = "TransactionSet (" + size + " transactions)" + "\n";
for (int i = 0; i < size; i++) {
text = text + "["+i+"]\t" + ((ItemSet)transactionList.elementAt(i)).toString();
if (i < size - 1)
text = text + "\n";
};
return text;
}
public void print() {
System.out.println(toString());
}
private ItemSet getHashTableEntry(int item) {
ItemSetList isl=hashTable[HashFunction(item)];
for (int i=0; i<isl.getSize(); i++)
if (isl.getItemSetAt(i).getSupportCount()==item) return isl.getItemSetAt(i);
if (dbg) System.out.println("item "+item+" not found in hashTable");
return null;
}
public ItemSetList buildHashTable() {
if (dbg) System.out.println("Start building HashTable...");
ItemSetList large1ItemSetList=new ItemSetList();
int currItem;
for (int trans=0; trans<size; trans++) {
if (dbg) if (trans %1000 ==0) System.out.println("adding transaction no. "+trans);
ItemSet basket=(ItemSet)transactionList.elementAt(trans);
for (int i=0; i<basket.getSize(); i++) {
currItem=basket.getIntegerAt(i);
int Hidx=HashFunction(currItem); //Hash function
if (hashTable[Hidx]==null) { //no entry at all here
//System.out.println("new hashList @"+Hidx+" ="+currItem+"\ttrans="+trans);
ItemSetList hashList=new ItemSetList();
ItemSet is=new ItemSet(); is.insertItem(trans); is.setSupportCount(currItem);
hashList.insertItemSet(is);
hashTable[Hidx]=hashList;
}
else{ // search in hashTable[Hidx] for ItemSet w/ SupportCount==currItem
boolean found=false;
int j;
ItemSetList currItemSet=(ItemSetList)hashTable[Hidx];
for (j=0; j< currItemSet.getSize(); j++)
if (currItemSet.getItemSetAt(j).getSupportCount()==currItem) {found=true; break;}
if (found) {
// System.out.println("add to hashList @"+Hidx+" ="+currItem+"\ttrans="+trans);
currItemSet.getItemSetAt(j).insertItem(trans);
}
else {
//System.out.println("new to hashList @"+Hidx+" ="+currItem+"\ttrans="+trans);
ItemSet is=new ItemSet(); is.insertItem(trans); is.setSupportCount(currItem);
currItemSet.insertItemSet(is);
}
}
}
}
// printHashTable();
// remove all ItemSets w/ SupportCount<minSuppCount
if (dbg) System.out.println("cleaning up..");
for (int i=0; i<hashTable.length; i++) {
if ((ItemSetList)hashTable[i]==null) continue;
for (int j=0; j< ((ItemSetList)hashTable[i]).getSize(); j++)
if ( ((ItemSetList)hashTable[i]).getItemSetAt(j).getSize()<minSuppCount)
{((ItemSetList)hashTable[i]).delItemSetAt(j); j--;}
else // include in list of 1-large itemsets
{ItemSet is=new ItemSet();
is.addItem(((ItemSetList)hashTable[i]).getItemSetAt(j).getSupportCount());
is.setSupportCount(((ItemSetList)hashTable[i]).getItemSetAt(j).getSize());
large1ItemSetList.addItemSet(is);
}
if (((ItemSetList)hashTable[i]).getSize()==0) hashTable[i]=null;
}
if (dbg) System.out.println("after cleanup");
//printHashTable();
//large1ItemSetList.print();
if (dbg) System.out.println("done building HashTable");
return large1ItemSetList;
}
public void printHashTable() {
for (int i=0; i<hashTable.length; i++) {
if ((ItemSetList)hashTable[i]==null) continue;
System.out.println("\nHashTable["+i+"]=");
for (int j=0; j< ((ItemSetList)hashTable[i]).getSize(); j++)
System.out.println("\t\tITEM="+((ItemSetList)hashTable[i]).getItemSetAt(j).getSupportCount()
+"\tSupportCount="+((ItemSetList)hashTable[i]).getItemSetAt(j).getSize());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -