📄 priorestimation.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* PriorEstimation.java
* Copyright (C) 2004 Stefan Mutter
*
*/
package weka.associations;
import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Utils;
import weka.core.SpecialFunctions;
import java.util.Random;
import java.util.Hashtable;
import java.io.Serializable;
/**
* Class implementing the prior estimattion of the predictive apriori algorithm
* for mining association rules.
*
* Reference: T. Scheffer (2001). <i>Finding Association Rules That Trade Support
* Optimally against Confidence</i>. Proc of the 5th European Conf.
* on Principles and Practice of Knowledge Discovery in Databases (PKDD'01),
* pp. 424-435. Freiburg, Germany: Springer-Verlag. <p>
*
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
* @version $Revision: 1.1 $ */
public class PriorEstimation implements Serializable{
/** The number of rnadom rules. */
protected int m_numRandRules;
/** The number of intervals. */
protected int m_numIntervals;
/** The random seed used for the random rule generation step. */
protected static final int SEED = 0;
/** The maximum number of attributes for which a prior can be estimated. */
protected static final int MAX_N = 1024;
/** The random number generator. */
protected Random m_randNum;
/** The instances for which association rules are mined. */
protected Instances m_instances;
/** Flag indicating whether standard association rules or class association rules are mined. */
protected boolean m_CARs;
/** Hashtable to store the confidence values of randomly generated rules. */
protected Hashtable m_distribution;
/** Hashtable containing the estimated prior probabilities. */
protected Hashtable m_priors;
/** Sums up the confidences of all rules with a certain length. */
protected double m_sum;
/** The mid points of the discrete intervals in which the interval [0,1] is divided. */
protected double[] m_midPoints;
/**
* Constructor
*
* @param instances the instances to be used for generating the associations
* @param numRules the number of random rules used for generating the prior
* @param numIntervals the number of intervals to discretise [0,1]
* @param car flag indicating whether standard or class association rules are mined
*/
public PriorEstimation(Instances instances,int numRules,int numIntervals,boolean car) {
m_instances = instances;
m_CARs = car;
m_numRandRules = numRules;
m_numIntervals = numIntervals;
m_randNum = m_instances.getRandomNumberGenerator(SEED);
}
/**
* Calculates the prior distribution.
*
* @exception Exception if prior can't be estimated successfully
*/
public final void generateDistribution() throws Exception{
boolean jump;
int i,maxLength = m_instances.numAttributes(), count =0,count1=0, ruleCounter;
int [] itemArray;
m_distribution = new Hashtable(maxLength*m_numIntervals);
RuleItem current;
ItemSet generate;
if(m_instances.numAttributes() == 0)
throw new Exception("Dataset has no attributes!");
if(m_instances.numAttributes() >= MAX_N)
throw new Exception("Dataset has to many attributes for prior estimation!");
if(m_instances.numInstances() == 0)
throw new Exception("Dataset has no instances!");
for (int h = 0; h < maxLength; h++) {
if (m_instances.attribute(h).isNumeric())
throw new Exception("Can't handle numeric attributes!");
}
if(m_numIntervals == 0 || m_numRandRules == 0)
throw new Exception("Prior initialisation impossible");
//calculate mid points for the intervals
midPoints();
//create random rules of length i and measure their support and if support >0 their confidence
for(i = 1;i <= maxLength; i++){
m_sum = 0;
int j = 0;
count = 0;
count1 = 0;
while(j < m_numRandRules){
count++;
jump =false;
if(!m_CARs){
itemArray = randomRule(maxLength,i,m_randNum);
current = splitItemSet(m_randNum.nextInt(i), itemArray);
}
else{
itemArray = randomCARule(maxLength,i,m_randNum);
current = addCons(itemArray);
}
int [] ruleItem = new int[maxLength];
for(int k =0; k < itemArray.length;k++){
if(current.m_premise.m_items[k] != -1)
ruleItem[k] = current.m_premise.m_items[k];
else
if(current.m_consequence.m_items[k] != -1)
ruleItem[k] = current.m_consequence.m_items[k];
else
ruleItem[k] = -1;
}
ItemSet rule = new ItemSet(ruleItem);
updateCounters(rule);
ruleCounter = rule.m_counter;
if(ruleCounter > 0)
jump =true;
updateCounters(current.m_premise);
j++;
if(jump){
buildDistribution((double)ruleCounter/(double)current.m_premise.m_counter, (double)i);
}
}
//normalize
if(m_sum > 0){
for(int w = 0; w < m_midPoints.length;w++){
String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
Double oldValue = (Double)m_distribution.remove(key);
if(oldValue == null){
m_distribution.put(key,new Double(1.0/m_numIntervals));
m_sum += 1.0/m_numIntervals;
}
else
m_distribution.put(key,oldValue);
}
for(int w = 0; w < m_midPoints.length;w++){
double conf =0;
String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
Double oldValue = (Double)m_distribution.remove(key);
if(oldValue != null){
conf = oldValue.doubleValue() / m_sum;
m_distribution.put(key,new Double(conf));
}
}
}
else{
for(int w = 0; w < m_midPoints.length;w++){
String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
m_distribution.put(key,new Double(1.0/m_numIntervals));
}
}
}
}
/**
* Constructs an item set of certain length randomly.
* This method is used for standard association rule mining.
* @param maxLength the number of attributes of the instances
* @param actualLength the number of attributes that should be present in the item set
* @param randNum the random number generator
* @return a randomly constructed item set in form of an int array
*/
public final int[] randomRule(int maxLength, int actualLength, Random randNum){
int[] itemArray = new int[maxLength];
for(int k =0;k < itemArray.length;k++)
itemArray[k] = -1;
int help =actualLength;
if(help == maxLength){
help = 0;
for(int h = 0; h < itemArray.length; h++){
itemArray[h] = m_randNum.nextInt((m_instances.attribute(h)).numValues());
}
}
while(help > 0){
int mark = randNum.nextInt(maxLength);
if(itemArray[mark] == -1){
help--;
itemArray[mark] = m_randNum.nextInt((m_instances.attribute(mark)).numValues());
}
}
return itemArray;
}
/**
* Constructs an item set of certain length randomly.
* This method is used for class association rule mining.
* @param maxLength the number of attributes of the instances
* @param actualLength the number of attributes that should be present in the item set
* @param randNum the random number generator
* @return a randomly constructed item set in form of an int array
*/
public final int[] randomCARule(int maxLength, int actualLength, Random randNum){
int[] itemArray = new int[maxLength];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -