📄 priorestimation.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * PriorEstimation.java
 * Copyright (C) 2004 Stefan Mutter
 *
 */

package weka.associations;

import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Utils;
import weka.core.SpecialFunctions;
import java.util.Random;
import java.util.Hashtable;
import java.io.Serializable;

/**
 * Class implementing the prior estimattion of the predictive apriori algorithm 
 * for mining association rules. 
 *
 * Reference: T. Scheffer (2001). <i>Finding Association Rules That Trade Support 
 * Optimally against Confidence</i>. Proc of the 5th European Conf.
 * on Principles and Practice of Knowledge Discovery in Databases (PKDD'01),
 * pp. 424-435. Freiburg, Germany: Springer-Verlag. <p>
 *
 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $ */

 public class PriorEstimation implements Serializable{
    
    /** The number of rnadom rules. */
    protected int m_numRandRules;
    
    /** The number of intervals. */
    protected int m_numIntervals;
    
    /** The random seed used for the random rule generation step. */
    protected static final int SEED = 0;
    
    /** The maximum number of attributes for which a prior can be estimated. */
    protected static final int MAX_N = 1024;
    
    /** The random number generator. */
    protected Random m_randNum;
    
    /** The instances for which association rules are mined. */
    protected Instances m_instances;
    
    /** Flag indicating whether standard association rules or class association rules are mined. */
    protected boolean m_CARs;
    
    /** Hashtable to store the confidence values of randomly generated rules. */    
    protected Hashtable m_distribution;
    
    /** Hashtable containing the estimated prior probabilities. */
    protected  Hashtable m_priors;
    
    /** Sums up the confidences of all rules with a certain length. */
    protected double m_sum;
    
    /** The mid points of the discrete intervals in which the interval [0,1] is divided. */
    protected double[] m_midPoints;
    
    
    
   /**
   * Constructor 
   *
   * @param instances the instances to be used for generating the associations
   * @param numRules the number of random rules used for generating the prior
   * @param numIntervals the number of intervals to discretise [0,1]
   * @param car flag indicating whether standard or class association rules are mined
   */
    public PriorEstimation(Instances instances,int numRules,int numIntervals,boolean car) {
        
       m_instances = instances;
       m_CARs = car;
       m_numRandRules = numRules;
       m_numIntervals = numIntervals;
       m_randNum = m_instances.getRandomNumberGenerator(SEED);
    }
    /**
   * Calculates the prior distribution.
   *
   * @exception Exception if prior can't be estimated successfully
   */
    public final void generateDistribution() throws Exception{
        
        boolean jump;
        int i,maxLength = m_instances.numAttributes(), count =0,count1=0, ruleCounter;
        int [] itemArray;
        m_distribution = new Hashtable(maxLength*m_numIntervals);
        RuleItem current;
        ItemSet generate;
        
        if(m_instances.numAttributes() == 0)
            throw new Exception("Dataset has no attributes!");
        if(m_instances.numAttributes() >= MAX_N)
            throw new Exception("Dataset has to many attributes for prior estimation!");
        if(m_instances.numInstances() == 0)
            throw new Exception("Dataset has no instances!");
        for (int h = 0; h < maxLength; h++) {
            if (m_instances.attribute(h).isNumeric())
                throw new Exception("Can't handle numeric attributes!");
        } 
        if(m_numIntervals  == 0 || m_numRandRules == 0)
            throw new Exception("Prior initialisation impossible");
       
        //calculate mid points for the intervals
        midPoints();
        
        //create random rules of length i and measure their support and if support >0 their confidence
        for(i = 1;i <= maxLength; i++){
            m_sum = 0;
            int j = 0;
            count = 0;
            count1 = 0;
            while(j < m_numRandRules){
                count++;
                jump =false;
                if(!m_CARs){
                    itemArray = randomRule(maxLength,i,m_randNum);
                    current = splitItemSet(m_randNum.nextInt(i), itemArray);
                }
                else{
                    itemArray = randomCARule(maxLength,i,m_randNum);
                    current = addCons(itemArray);
                }
                int [] ruleItem = new int[maxLength];
                for(int k =0; k < itemArray.length;k++){
                    if(current.m_premise.m_items[k] != -1)
                        ruleItem[k] = current.m_premise.m_items[k];
                    else
                        if(current.m_consequence.m_items[k] != -1)
                            ruleItem[k] = current.m_consequence.m_items[k];
                        else
                            ruleItem[k] = -1;
                }
                ItemSet rule = new ItemSet(ruleItem);
                updateCounters(rule);
                ruleCounter = rule.m_counter;
                if(ruleCounter > 0)
                    jump =true;
                updateCounters(current.m_premise);
                j++;
                if(jump){
                    buildDistribution((double)ruleCounter/(double)current.m_premise.m_counter, (double)i);
                }
             }
            
            //normalize
            if(m_sum > 0){
                for(int w = 0; w < m_midPoints.length;w++){
                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
                    Double oldValue = (Double)m_distribution.remove(key);
                    if(oldValue == null){
                        m_distribution.put(key,new Double(1.0/m_numIntervals));
                        m_sum += 1.0/m_numIntervals;
                    }
                    else
                        m_distribution.put(key,oldValue);
                }
                for(int w = 0; w < m_midPoints.length;w++){
                    double conf =0;
                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
                    Double oldValue = (Double)m_distribution.remove(key);
                    if(oldValue != null){
                        conf = oldValue.doubleValue() / m_sum;
                        m_distribution.put(key,new Double(conf));
                    }
                }
            }
            else{
                for(int w = 0; w < m_midPoints.length;w++){
                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));
                    m_distribution.put(key,new Double(1.0/m_numIntervals));
                }
            }
        }
        
    }
    
    /**
     * Constructs an item set of certain length randomly.
     * This method is used for standard association rule mining.
     * @param maxLength the number of attributes of the instances
     * @param actualLength the number of attributes that should be present in the item set
     * @param randNum the random number generator
     * @return a randomly constructed item set in form of an int array
     */
    public final int[] randomRule(int maxLength, int actualLength, Random randNum){
     
        int[] itemArray = new int[maxLength];
        for(int k =0;k < itemArray.length;k++)
            itemArray[k] = -1;
        int help =actualLength;
        if(help == maxLength){
            help = 0;
            for(int h = 0; h < itemArray.length; h++){
                itemArray[h] = m_randNum.nextInt((m_instances.attribute(h)).numValues());
            }
        }
        while(help > 0){
            int mark = randNum.nextInt(maxLength);
            if(itemArray[mark] == -1){
                help--;
                itemArray[mark] = m_randNum.nextInt((m_instances.attribute(mark)).numValues());
            }
       }
        return itemArray;
    }
    
    
    /**
     * Constructs an item set of certain length randomly.
     * This method is used for class association rule mining.
     * @param maxLength the number of attributes of the instances
     * @param actualLength the number of attributes that should be present in the item set
     * @param randNum the random number generator
     * @return a randomly constructed item set in form of an int array
     */
     public final int[] randomCARule(int maxLength, int actualLength, Random randNum){
     
        int[] itemArray = new int[maxLength];
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -