📄 itemset.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.association;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.io.*;
import java.util.*;
import org.agentacademy.modules.dataminer.core.*;
/**
* Class for storing a set of items. Item sets are stored in a lexicographic
* order, which is determined by the header information of the set of instances
* used for generating the set of items. All methods in this class assume that
* item sets are stored in lexicographic order.
*
*/
public class ItemSet implements Serializable {
/** The items stored as an array of of ints. */
protected int[] m_items;
/** Counter for how many transactions contain this item set. */
protected int m_counter;
/** The total number of transactions */
protected int m_totalTransactions;
/**
* Constructor
* @param totalTrans the total number of transactions in the data
*/
public ItemSet(int totalTrans) {
m_totalTransactions = totalTrans;
}
/**
* Outputs the confidence for a rule.
*
* @param premise the premise of the rule
* @param consequence the consequence of the rule
* @return the confidence on the training data
*/
public static double confidenceForRule(ItemSet premise,
ItemSet consequence) {
return (double)consequence.m_counter/(double)premise.m_counter;
}
/**
* Outputs the lift for a rule. Lift is defined as:<br>
* confidence / prob(consequence)
*
* @param premise the premise of the rule
* @param consequence the consequence of the rule
* @param consequenceCount how many times the consequence occurs independent
* of the premise
* @return the lift on the training data
*/
public double liftForRule(ItemSet premise,
ItemSet consequence,
int consequenceCount) {
double confidence = confidenceForRule(premise, consequence);
return confidence / ((double)consequenceCount /
(double)m_totalTransactions);
}
/**
* Outputs the leverage for a rule. Leverage is defined as: <br>
* prob(premise & consequence) - (prob(premise) * prob(consequence))
*
* @param premise the premise of the rule
* @param consequence the consequence of the rule
* @param premiseCount how many times the premise occurs independent
* of the consequent
* @param consequenceCount how many times the consequence occurs independent
* of the premise
* @return the leverage on the training data
*/
public double leverageForRule(ItemSet premise,
ItemSet consequence,
int premiseCount,
int consequenceCount) {
double coverageForItemSet = (double)consequence.m_counter /
(double)m_totalTransactions;
double expectedCoverageIfIndependent =
((double)premiseCount / (double)m_totalTransactions) *
((double)consequenceCount / (double)m_totalTransactions);
double lev = coverageForItemSet - expectedCoverageIfIndependent;
return lev;
}
/**
* Outputs the conviction for a rule. Conviction is defined as: <br>
* prob(premise) * prob(!consequence) / prob(premise & !consequence)
*
* @param premise the premise of the rule
* @param consequence the consequence of the rule
* @param premiseCount how many times the premise occurs independent
* of the consequent
* @param consequenceCount how many times the consequence occurs independent
* of the premise
* @return the conviction on the training data
*/
public double convictionForRule(ItemSet premise,
ItemSet consequence,
int premiseCount,
int consequenceCount) {
double num =
(double)premiseCount * (double)(m_totalTransactions - consequenceCount) *
(double)m_totalTransactions;
double denom =
((premiseCount - consequence.m_counter)+1);
if (num < 0 || denom < 0) {
System.err.println("*** "+num+" "+denom);
System.err.println("premis count: "+premiseCount+" consequence count "+consequenceCount+" total trans "+m_totalTransactions);
}
return num / denom;
}
/**
* Checks if an instance contains an item set.
*
* @param instance the instance to be tested
* @return true if the given instance contains this item set
*/
public final boolean containedBy(Instance instance) {
for (int i = 0; i < instance.numAttributes(); i++)
if (m_items[i] > -1) {
if (instance.isMissing(i))
return false;
if (m_items[i] != (int)instance.value(i))
return false;
}
return true;
}
/**
* Deletes all item sets that don't have minimum support.
*
* @param itemSets the set of item sets to be pruned
* @param minSupport the minimum number of transactions to be covered
* @return the reduced set of item sets
*/
public static FastVector deleteItemSets(FastVector itemSets,
int minSupport,
int maxSupport) {
FastVector newVector = new FastVector(itemSets.size());
for (int i = 0; i < itemSets.size(); i++) {
ItemSet current = (ItemSet)itemSets.elementAt(i);
if ((current.m_counter >= minSupport)
&& (current.m_counter <= maxSupport))
newVector.addElement(current);
}
return newVector;
}
/**
* Tests if two item sets are equal.
*
* @param itemSet another item set
* @return true if this item set contains the same items as the given one
*/
public final boolean equals(Object itemSet) {
if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
return false;
}
if (m_items.length != ((ItemSet)itemSet).m_items.length)
return false;
for (int i = 0; i < m_items.length; i++)
if (m_items[i] != ((ItemSet)itemSet).m_items[i])
return false;
return true;
}
/**
* Generates all rules for an item set.
*
* @param minConfidence the minimum confidence the rules have to have
* @param hashtables containing all(!) previously generated
* item sets
* @param numItemsInSet the size of the item set for which the rules
* are to be generated
* @return all the rules with minimum confidence for the given item set
*/
public final FastVector[] generateRules(double minConfidence,
FastVector hashtables,
int numItemsInSet) {
FastVector premises = new FastVector(),consequences = new FastVector(),
conf = new FastVector();
FastVector[] rules = new FastVector[3], moreResults;
ItemSet premise, consequence;
Hashtable hashtable = (Hashtable)hashtables.elementAt(numItemsInSet - 2);
// Generate all rules with one item in the consequence.
for (int i = 0; i < m_items.length; i++)
if (m_items[i] != -1) {
premise = new ItemSet(m_totalTransactions);
consequence = new ItemSet(m_totalTransactions);
premise.m_items = new int[m_items.length];
consequence.m_items = new int[m_items.length];
consequence.m_counter = m_counter;
for (int j = 0; j < m_items.length; j++)
consequence.m_items[j] = -1;
System.arraycopy(m_items, 0, premise.m_items, 0, m_items.length);
premise.m_items[i] = -1;
consequence.m_items[i] = m_items[i];
premise.m_counter = ((Integer)hashtable.get(premise)).intValue();
premises.addElement(premise);
consequences.addElement(consequence);
conf.addElement(new Double(confidenceForRule(premise, consequence)));
}
rules[0] = premises;
rules[1] = consequences;
rules[2] = conf;
pruneRules(rules, minConfidence);
// Generate all the other rules
moreResults = moreComplexRules(rules, numItemsInSet, 1, minConfidence,
hashtables);
if (moreResults != null)
for (int i = 0; i < moreResults[0].size(); i++) {
rules[0].addElement(moreResults[0].elementAt(i));
rules[1].addElement(moreResults[1].elementAt(i));
rules[2].addElement(moreResults[2].elementAt(i));
}
return rules;
}
/**
* Generates all significant rules for an item set.
*
* @param minMetric the minimum metric (confidence, lift, leverage,
* improvement) the rules have to have
* @param metricType (confidence=0, lift, leverage, improvement)
* @param hashtables containing all(!) previously generated
* item sets
* @param numItemsInSet the size of the item set for which the rules
* are to be generated
* @param the significance level for testing the rules
* @return all the rules with minimum metric for the given item set
* @exception Exception if something goes wrong
*/
public final FastVector[] generateRulesBruteForce(double minMetric,
int metricType,
FastVector hashtables,
int numItemsInSet,
int numTransactions,
double significanceLevel)
throws Exception {
FastVector premises = new FastVector(),consequences = new FastVector(),
conf = new FastVector(), lift = new FastVector(), lev = new FastVector(),
conv = new FastVector();
FastVector[] rules = new FastVector[6];
ItemSet premise, consequence;
Hashtable hashtableForPremise, hashtableForConsequence;
int numItemsInPremise, help, max, consequenceUnconditionedCounter;
double[][] contingencyTable = new double[2][2];
double metric, chiSquared;
// Generate all possible rules for this item set and test their
// significance.
max = (int)Math.pow(2, numItemsInSet);
for (int j = 1; j < max; j++) {
numItemsInPremise = 0;
help = j;
while (help > 0) {
if (help % 2 == 1)
numItemsInPremise++;
help /= 2;
}
if (numItemsInPremise < numItemsInSet) {
hashtableForPremise =
(Hashtable)hashtables.elementAt(numItemsInPremise-1);
hashtableForConsequence =
(Hashtable)hashtables.elementAt(numItemsInSet-numItemsInPremise-1);
premise = new ItemSet(m_totalTransactions);
consequence = new ItemSet(m_totalTransactions);
premise.m_items = new int[m_items.length];
consequence.m_items = new int[m_items.length];
consequence.m_counter = m_counter;
help = j;
for (int i = 0; i < m_items.length; i++)
if (m_items[i] != -1) {
if (help % 2 == 1) {
premise.m_items[i] = m_items[i];
consequence.m_items[i] = -1;
} else {
premise.m_items[i] = -1;
consequence.m_items[i] = m_items[i];
}
help /= 2;
} else {
premise.m_items[i] = -1;
consequence.m_items[i] = -1;
}
premise.m_counter = ((Integer)hashtableForPremise.get(premise)).intValue();
consequenceUnconditionedCounter =
((Integer)hashtableForConsequence.get(consequence)).intValue();
if (metricType == 0) {
contingencyTable[0][0] = (double)(consequence.m_counter);
contingencyTable[0][1] = (double)(premise.m_counter - consequence.m_counter);
contingencyTable[1][0] = (double)(consequenceUnconditionedCounter -
consequence.m_counter);
contingencyTable[1][1] = (double)(numTransactions - premise.m_counter -
consequenceUnconditionedCounter +
consequence.m_counter);
chiSquared = ContingencyTables.chiSquared(contingencyTable, false);
metric = confidenceForRule(premise, consequence);
if ((!(metric < minMetric)) &&
(!(chiSquared > significanceLevel))) {
premises.addElement(premise);
consequences.addElement(consequence);
conf.addElement(new Double(metric));
lift.addElement(new Double(liftForRule(premise, consequence,
consequenceUnconditionedCounter)));
lev.addElement(new Double(leverageForRule(premise, consequence,
premise.m_counter,
consequenceUnconditionedCounter)));
conv.addElement(new Double(convictionForRule(premise, consequence,
premise.m_counter,
consequenceUnconditionedCounter)));
}
} else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -