📄 sequential.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: prudsys basket analysis
* Description: Basket analysis algorithms
* Copyright: Copyright (c) 2001
* Company: PRUDENTIAL SYSTEMS SOFTWARE GmbH
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Models.Sequential.Algorithms.SeqSimple;
import java.util.Collection;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
import com.prudsys.pdm.Models.Sequential.ItemSetSeq;
import com.prudsys.pdm.Models.Sequential.SequentialAlgorithm;
import com.prudsys.pdm.Utils.IntHashtable;
import com.prudsys.pdm.Utils.IntVector;
/**
* Simple sequential basket analysis algorithm based on Apriori.
* Algorithm did not find cycles.
*
* @author Michael Thess
* @version 1.1, 2001/05/07
*/
public class Sequential extends SequentialAlgorithm {
/**
* Debugging.
*/
private static final boolean debug = false;
/** Minimum number of supporting transactions, only used if >0. */
private int minimumSupportCount = 0;
/** Minimum item size. */
private int minimumItemSize = 1;
/** Maximum item size. */
private int maximumItemSize = -1;
/** Sequential rules. */
private Vector sequentialRules;
/** Sequential transaction set. */
private TransactionSetSeq transSet = null;
/**
* Empty constructor.
*/
public Sequential() {
}
/**
* Returns vector of sequential rules.
*
* @return vector of sequential rules (ItemSetSeq)
*/
public Vector getSequentialRules()
{
return sequentialRules;
}
/**
* Sets minimum item size.
*
* @param minimumItemSize minimum item size
*/
public void setMinimumItemSize(int minimumItemSize)
{
this.minimumItemSize = minimumItemSize;
}
/**
* Sets maximum item size (-1: unbounded).
*
* @param maximumItemSize maximum item size
*/
public void setMaximumItemSize(int maximumItemSize)
{
this.maximumItemSize = maximumItemSize;
}
/**
* Returns minimum number of transactions containing item pairs.
* The relative minimum support value is defined by the minimumSupport
* parameter.
*
* @return minimum support count
*/
public int getMinimumSupportCount()
{
return minimumSupportCount;
}
/**
* Sets minimum number of transactions containing item pairs.
* The relative minimum support value is defined by the minimumSupport
* parameter.
*
* @param minimumSupportCount new minimum support count
*/
public void setMinimumSupportCount(int minimumSupportCount)
{
this.minimumSupportCount = minimumSupportCount;
}
/**
* Checks mining algorithm for completeness by calling verify method
* of superclass. Adiitionally, it checks whether minimumItemSize and
* maximumItemSize are admissible.
*
* @throws IllegalArgumentException if some algorithm attributes are incorrect
*/
public void verify() throws IllegalArgumentException
{
super.verify();
if (minimumItemSize < 0)
throw new IllegalArgumentException("minimumItemSize can't be negative");
if (maximumItemSize > -1 && minimumItemSize > maximumItemSize)
throw new IllegalArgumentException("minimumItemSize can't be larger than maximumItemSize");
}
/**
* Implements the Algorithm Sequential (without cycles). If a transactionId, itemId,
* or itemIndex has a missing value, the (transactionId, itemId, itemIndex)-tuple is ignored.
*/
public void runAlgorithm() throws MiningException, IllegalArgumentException
{
// Convert input stream to sequential transaction set when called for first time:
if (transSet == null)
{
transSet = convertMiningInputStream( miningInputStream );
};
// Run algorithm:
SequentialResult sqRes = SequentialAlg();
// Transform result:
sequentialRules = new Vector();
ItemSetSeqList isl = sqRes.getLargeSequences();
for (int i = 0; i < isl.getSize(); i++)
sequentialRules.add( isl.getItemSetSeqAt(i) );
}
/**
* Create sequential transaction set from input stream.
*
* @param miningInputStream mining input stream to be converted
* @return sequential transaction set of mining input stream
*/
private TransactionSetSeq convertMiningInputStream( MiningInputStream miningInputStream ) throws MiningException
{
Hashtable transactions = new Hashtable();
while(miningInputStream.next())
{
MiningVector vector = miningInputStream.read();
double transId = vector.getValue(transactionId);
double itId = vector.getValue(itemId);
double itIndex = vector.getValue(itemIndex);
// Missing value => ignore line:
if ( Category.isMissingValue(transId) || Category.isMissingValue(itId) || Category.isMissingValue(itIndex) )
continue;
int item = (int) itId;
Double value = new Double(transId);
Vector sequence = (Vector)transactions.get(value);
if(sequence == null)
{
sequence = new Vector();
transactions.put(value,sequence);
}
sequence.add(new Item(item,(int)itIndex));
}
Collection coll = transactions.values();
Iterator it = coll.iterator();
TransactionSetSeq tss = new TransactionSetSeq();
while(it.hasNext())
{
Vector sequence = (Vector)it.next();
ItemSetSeq iss = new ItemSetSeq();
Object[] seq = sequence.toArray();
int min, element;
for(int j=0;j<seq.length;j++)
{
min = ((Item)seq[j]).getIndex(); element = -1;
for(int k=j+1;k<seq.length;k++)
{
int index = ((Item)seq[k]).getIndex();
if(index < min)
{
min = index; element = k;
}
}
if(element!=-1)
{
Object swap = seq[j];
seq[j] = seq[element];
seq[element] = swap;
}
iss.addItem(((Item)seq[j]).getItem());
}
tss.addTransaction(iss);
}
return tss;
}
/**
* Sequential basket analysis algorithm of Michael Thess.
*
* @return result as list of sequential item sets
*/
public SequentialResult SequentialAlg() {
// Copy to transaction set:
TransactionSet ts;
ts = new TransactionSet(); // First ts is used for usual transactions
for (int i = 0; i < transSet.getSize(); i++) {
ItemSet is = new ItemSet();
ItemSetSeq iss = (ItemSetSeq) transSet.getItemSetAt(i);
for (int j = 0; j < iss.getSize(); j++)
is.addItem( iss.getItemAt(j) );
ts.addTransaction(is);
}
if (debug) {
transSet.print();
ts.print();
};
// Apply Apriori algorithm for large itemsets:
ItemSetList largeItemSetList = Apriori.AprioriAlgLITS((TransactionSet) ts, minimumSupportCount, minimumSupport,
1, maximumItemSize);
if (debug)
{
showLITS(largeItemSetList, ts.getSize());
}
// First find all items in large itemsets and write them into hash table:
IntHashtable largeItems = new IntHashtable();
int nLITS = largeItemSetList.getSize(); // number of large itemsets
int nLit = 0; // number of large items
for (int i = 0; i < nLITS; i++) {
ItemSet is = (ItemSet) largeItemSetList.getItemSetAt(i);
int itemSize = is.getSize();
for (int j = 0; j < itemSize; j++) {
int pN = is.getIntegerAt(j);
if (largeItems.get(pN) == null) {
largeItems.put(pN, nLit);
nLit = nLit + 1;
};
};
};
// Vector containing all transactions for all itemsets:
IntVector[] itemTrans = new IntVector[nLit];
for (int i = 0; i < nLit; i++)
itemTrans[i] = new IntVector();
// Show large items:
if (debug)
{
System.out.println("Large items:");
showLargeItemHash(largeItems, itemTrans);
}
// Now take all items from sequential transaction set which are
// large items and copy to transaction list 'ts':
TransactionSetSeq tss;
tss = new TransactionSetSeq(); // Now ts is used for sequential transactions
for (int i = 0; i < transSet.getSize(); i++) {
ItemSetSeq iss = (ItemSetSeq) transSet.getItemSetAt(i);
ItemSetSeq isnew = new ItemSetSeq();
// Copy iss => isnew:
int prev_item = -1;
for (int j = 0; j < iss.getSize(); j++) {
int item = iss.getItemAt(j);
Integer Item = largeItems.get(item); // num of large item
// Item is large item => remains part of transaction, if not doubled:
if (Item != null && item != prev_item) {
isnew.addItem(item);
// Add to transaction vector if still not contained:
int ind = Item.intValue();
boolean cont = false;
for (int k = 0; k < itemTrans[ind].size(); k++)
if (itemTrans[ind].IntegerAt(k) == i)
cont = true;
if (!cont)
itemTrans[ind].addElement(i);
};
prev_item = item;
};
// Add to transaction list:
tss.addTransaction(isnew);
};
// Show transactions, large items with transactions:
if (debug) {
System.out.println("TS w/o nonlarge items");
ts.print();
showLargeItemHash(largeItems, itemTrans);
};
// Calculate and get minimum support count:
if (minimumSupportCount <= 0)
tss.setMinSuppCount(minimumSupport);
else
tss.setMinSuppCount(minimumSupportCount);
int minSuppCount = tss.getMinSuppCount();
if (debug)
System.out.println("minSuppCount = " + minSuppCount);
// Create list of sequential itemsets:
ItemSetSeqList sequenceList = new ItemSetSeqList();
// Check sequential items and add to list:
for (int i = 0; i < nLITS; i++) {
// Large itemset:
ItemSet is = (ItemSet) largeItemSetList.getItemSetAt(i);
int itemSize = is.getSize();
// Filter itemsets of appropriate size:
if (itemSize < minimumItemSize)
continue;
// Items for testing:
int[] getItems = new int[itemSize];
for (int j = 0; j < itemSize; j++)
getItems[j] = is.getIntegerAt(j);
// Get all transactions including these numbers:
IntVector transNr = getTransactNumbers(getItems, largeItems, itemTrans);
if (debug) {
System.out.print("\n L I T S: ");
for (int j = 0; j < itemSize; j++)
System.out.println(getItems[j] + " ");
System.out.println();
System.out.println("Numbers of transactions including this LITS: ");
for (int j = 0; j < transNr.size(); j++)
System.out.print(transNr.IntegerAt(j) + " ");
System.out.println();
};
// Create all permutations:
ItemSetSeq[] testItems = getISSbyPermut(getItems);
// Get support count of testing itemset:
for (int j = 0; j < testItems.length; j++) {
// Show testing itemset:
if (debug) {
System.out.println("Candidate item order:");
testItems[j].print();
};
// Get support count:
int suppCount = getSupportCount(testItems[j], (TransactionSetSeq) tss, transNr);
if (debug)
System.out.println("suppCount = " + suppCount);
// Support constraint if satisfied:
if (suppCount >= minSuppCount) {
testItems[j].setSupportCount(suppCount);
sequenceList.addItemSetSeq(testItems[j]);
};
};
};
/*************************** END OF SEQUENCE PHASE *****************************/
/*******************************************************************************/
// Show result:
if (debug) {
System.out.println("\n R E S U L T:");
sequenceList.print();
};
SequentialResult seqRes = new SequentialResult(sequenceList);
return seqRes;
}
/**
* Get support count for sequential itemset.
*
* @param testIss sequential itemset
* @param transSet sequential transaction set
* @param tnr numbers of transactions to be tested
* @return support count
*/
private static int getSupportCount(ItemSetSeq testIss, TransactionSetSeq transSet, IntVector tnr) {
int nsupp = 0;
// Data of test itemset:
int t_size = testIss.getSize();
int[] titems = new int[t_size];
for (int i = 0; i < t_size; i++)
titems[i] = testIss.getItemAt(i);
// Loop over all transactions:
for (int i = 0; i < tnr.size(); i++) {
ItemSetSeq iss = transSet.getItemSetAt( tnr.IntegerAt(i) );
// Data of transaction itemset:
int i_size = iss.getSize();
int[] items = new int[i_size];
for (int j = 0; j < i_size; j++)
items[j] = iss.getItemAt(j);
// Searching:
int[] isp = new int[t_size];
for (int j = 0; j < t_size; j++)
isp[j] = -1;
boolean match = false;
while (true) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -