📄 priorestimation.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PriorEstimation.java * Copyright (C) 2004 Stefan Mutter * */package weka.associations;import weka.core.Instances;import weka.core.FastVector;import weka.core.Utils;import weka.core.SpecialFunctions;import java.util.Random;import java.util.Hashtable;import java.io.Serializable;/** * Class implementing the prior estimattion of the predictive apriori algorithm * for mining association rules. * * Reference: T. Scheffer (2001). <i>Finding Association Rules That Trade Support * Optimally against Confidence</i>. Proc of the 5th European Conf. * on Principles and Practice of Knowledge Discovery in Databases (PKDD'01), * pp. 424-435. Freiburg, Germany: Springer-Verlag. <p> * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 1.4 $ */ public class PriorEstimation implements Serializable{ /** The number of rnadom rules. */ protected int m_numRandRules; /** The number of intervals. */ protected int m_numIntervals; /** The random seed used for the random rule generation step. */ protected static final int SEED = 0; /** The maximum number of attributes for which a prior can be estimated. */ protected static final int MAX_N = 1024; /** The random number generator. */ protected Random m_randNum; /** The instances for which association rules are mined. */ protected Instances m_instances; /** Flag indicating whether standard association rules or class association rules are mined. */ protected boolean m_CARs; /** Hashtable to store the confidence values of randomly generated rules. */ protected Hashtable m_distribution; /** Hashtable containing the estimated prior probabilities. */ protected Hashtable m_priors; /** Sums up the confidences of all rules with a certain length. */ protected double m_sum; /** The mid points of the discrete intervals in which the interval [0,1] is divided. */ protected double[] m_midPoints; /** * Constructor * * @param instances the instances to be used for generating the associations * @param numRules the number of random rules used for generating the prior * @param numIntervals the number of intervals to discretise [0,1] * @param car flag indicating whether standard or class association rules are mined */ public PriorEstimation(Instances instances,int numRules,int numIntervals,boolean car) { m_instances = instances; m_CARs = car; m_numRandRules = numRules; m_numIntervals = numIntervals; m_randNum = m_instances.getRandomNumberGenerator(SEED); } /** * Calculates the prior distribution. * * @exception Exception if prior can't be estimated successfully */ public final void generateDistribution() throws Exception{ boolean jump; int i,maxLength = m_instances.numAttributes(), count =0,count1=0, ruleCounter; int [] itemArray; m_distribution = new Hashtable(maxLength*m_numIntervals); RuleItem current; ItemSet generate; if(m_instances.numAttributes() == 0) throw new Exception("Dataset has no attributes!"); if(m_instances.numAttributes() >= MAX_N) throw new Exception("Dataset has to many attributes for prior estimation!"); if(m_instances.numInstances() == 0) throw new Exception("Dataset has no instances!"); for (int h = 0; h < maxLength; h++) { if (m_instances.attribute(h).isNumeric()) throw new Exception("Can't handle numeric attributes!"); } if(m_numIntervals == 0 || m_numRandRules == 0) throw new Exception("Prior initialisation impossible"); //calculate mid points for the intervals midPoints(); //create random rules of length i and measure their support and if support >0 their confidence for(i = 1;i <= maxLength; i++){ m_sum = 0; int j = 0; count = 0; count1 = 0; while(j < m_numRandRules){ count++; jump =false; if(!m_CARs){ itemArray = randomRule(maxLength,i,m_randNum); current = splitItemSet(m_randNum.nextInt(i), itemArray); } else{ itemArray = randomCARule(maxLength,i,m_randNum); current = addCons(itemArray); } int [] ruleItem = new int[maxLength]; for(int k =0; k < itemArray.length;k++){ if(current.m_premise.m_items[k] != -1) ruleItem[k] = current.m_premise.m_items[k]; else if(current.m_consequence.m_items[k] != -1) ruleItem[k] = current.m_consequence.m_items[k]; else ruleItem[k] = -1; } ItemSet rule = new ItemSet(ruleItem); updateCounters(rule); ruleCounter = rule.m_counter; if(ruleCounter > 0) jump =true; updateCounters(current.m_premise); j++; if(jump){ buildDistribution((double)ruleCounter/(double)current.m_premise.m_counter, (double)i); } } //normalize if(m_sum > 0){ for(int w = 0; w < m_midPoints.length;w++){ String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i)); Double oldValue = (Double)m_distribution.remove(key); if(oldValue == null){ m_distribution.put(key,new Double(1.0/m_numIntervals)); m_sum += 1.0/m_numIntervals; } else m_distribution.put(key,oldValue); } for(int w = 0; w < m_midPoints.length;w++){ double conf =0; String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i)); Double oldValue = (Double)m_distribution.remove(key); if(oldValue != null){ conf = oldValue.doubleValue() / m_sum; m_distribution.put(key,new Double(conf)); } } } else{ for(int w = 0; w < m_midPoints.length;w++){ String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i)); m_distribution.put(key,new Double(1.0/m_numIntervals)); } } } } /** * Constructs an item set of certain length randomly. * This method is used for standard association rule mining. * @param maxLength the number of attributes of the instances * @param actualLength the number of attributes that should be present in the item set * @param randNum the random number generator * @return a randomly constructed item set in form of an int array */ public final int[] randomRule(int maxLength, int actualLength, Random randNum){ int[] itemArray = new int[maxLength]; for(int k =0;k < itemArray.length;k++) itemArray[k] = -1; int help =actualLength; if(help == maxLength){ help = 0; for(int h = 0; h < itemArray.length; h++){ itemArray[h] = m_randNum.nextInt((m_instances.attribute(h)).numValues()); } } while(help > 0){ int mark = randNum.nextInt(maxLength); if(itemArray[mark] == -1){ help--; itemArray[mark] = m_randNum.nextInt((m_instances.attribute(mark)).numValues()); } } return itemArray; } /** * Constructs an item set of certain length randomly. * This method is used for class association rule mining. * @param maxLength the number of attributes of the instances * @param actualLength the number of attributes that should be present in the item set * @param randNum the random number generator * @return a randomly constructed item set in form of an int array */ public final int[] randomCARule(int maxLength, int actualLength, Random randNum){ int[] itemArray = new int[maxLength];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -