priorestimation.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 479 行 · 第 1/2 页
JAVA
479 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * PriorEstimation.java * Copyright (C) 2004 Stefan Mutter * */package weka.associations;import weka.core.Instances;import weka.core.FastVector;import weka.core.Utils;import weka.core.SpecialFunctions;import java.util.Random;import java.util.Hashtable;import java.io.Serializable;/** * Class implementing the prior estimattion of the predictive apriori algorithm  * for mining association rules.  * * Reference: T. Scheffer (2001). <i>Finding Association Rules That Trade Support  * Optimally against Confidence</i>. Proc of the 5th European Conf. * on Principles and Practice of Knowledge Discovery in Databases (PKDD'01), * pp. 424-435. Freiburg, Germany: Springer-Verlag. <p> * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 1.4 $ */ public class PriorEstimation implements Serializable{        /** The number of rnadom rules. */    protected int m_numRandRules;        /** The number of intervals. */    protected int m_numIntervals;        /** The random seed used for the random rule generation step. */    protected static final int SEED = 0;        /** The maximum number of attributes for which a prior can be estimated. */    protected static final int MAX_N = 1024;        /** The random number generator. */    protected Random m_randNum;        /** The instances for which association rules are mined. */    protected Instances m_instances;        /** Flag indicating whether standard association rules or class association rules are mined. */    protected boolean m_CARs;        /** Hashtable to store the confidence values of randomly generated rules. */        protected Hashtable m_distribution;        /** Hashtable containing the estimated prior probabilities. */    protected  Hashtable m_priors;        /** Sums up the confidences of all rules with a certain length. */    protected double m_sum;        /** The mid points of the discrete intervals in which the interval [0,1] is divided. */    protected double[] m_midPoints;               /**   * Constructor    *   * @param instances the instances to be used for generating the associations   * @param numRules the number of random rules used for generating the prior   * @param numIntervals the number of intervals to discretise [0,1]   * @param car flag indicating whether standard or class association rules are mined   */    public PriorEstimation(Instances instances,int numRules,int numIntervals,boolean car) {               m_instances = instances;       m_CARs = car;       m_numRandRules = numRules;       m_numIntervals = numIntervals;       m_randNum = m_instances.getRandomNumberGenerator(SEED);    }    /**   * Calculates the prior distribution.   *   * @exception Exception if prior can't be estimated successfully   */    public final void generateDistribution() throws Exception{                boolean jump;        int i,maxLength = m_instances.numAttributes(), count =0,count1=0, ruleCounter;        int [] itemArray;        m_distribution = new Hashtable(maxLength*m_numIntervals);        RuleItem current;        ItemSet generate;                if(m_instances.numAttributes() == 0)            throw new Exception("Dataset has no attributes!");        if(m_instances.numAttributes() >= MAX_N)            throw new Exception("Dataset has to many attributes for prior estimation!");        if(m_instances.numInstances() == 0)            throw new Exception("Dataset has no instances!");        for (int h = 0; h < maxLength; h++) {            if (m_instances.attribute(h).isNumeric())                throw new Exception("Can't handle numeric attributes!");        }         if(m_numIntervals  == 0 || m_numRandRules == 0)            throw new Exception("Prior initialisation impossible");               //calculate mid points for the intervals        midPoints();                //create random rules of length i and measure their support and if support >0 their confidence        for(i = 1;i <= maxLength; i++){            m_sum = 0;            int j = 0;            count = 0;            count1 = 0;            while(j < m_numRandRules){                count++;                jump =false;                if(!m_CARs){                    itemArray = randomRule(maxLength,i,m_randNum);                    current = splitItemSet(m_randNum.nextInt(i), itemArray);                }                else{                    itemArray = randomCARule(maxLength,i,m_randNum);                    current = addCons(itemArray);                }                int [] ruleItem = new int[maxLength];                for(int k =0; k < itemArray.length;k++){                    if(current.m_premise.m_items[k] != -1)                        ruleItem[k] = current.m_premise.m_items[k];                    else                        if(current.m_consequence.m_items[k] != -1)                            ruleItem[k] = current.m_consequence.m_items[k];                        else                            ruleItem[k] = -1;                }                ItemSet rule = new ItemSet(ruleItem);                updateCounters(rule);                ruleCounter = rule.m_counter;                if(ruleCounter > 0)                    jump =true;                updateCounters(current.m_premise);                j++;                if(jump){                    buildDistribution((double)ruleCounter/(double)current.m_premise.m_counter, (double)i);                }             }                        //normalize            if(m_sum > 0){                for(int w = 0; w < m_midPoints.length;w++){                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));                    Double oldValue = (Double)m_distribution.remove(key);                    if(oldValue == null){                        m_distribution.put(key,new Double(1.0/m_numIntervals));                        m_sum += 1.0/m_numIntervals;                    }                    else                        m_distribution.put(key,oldValue);                }                for(int w = 0; w < m_midPoints.length;w++){                    double conf =0;                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));                    Double oldValue = (Double)m_distribution.remove(key);                    if(oldValue != null){                        conf = oldValue.doubleValue() / m_sum;                        m_distribution.put(key,new Double(conf));                    }                }            }            else{                for(int w = 0; w < m_midPoints.length;w++){                    String key = (String.valueOf(m_midPoints[w])).concat(String.valueOf((double)i));                    m_distribution.put(key,new Double(1.0/m_numIntervals));                }            }        }            }        /**     * Constructs an item set of certain length randomly.     * This method is used for standard association rule mining.     * @param maxLength the number of attributes of the instances     * @param actualLength the number of attributes that should be present in the item set     * @param randNum the random number generator     * @return a randomly constructed item set in form of an int array     */    public final int[] randomRule(int maxLength, int actualLength, Random randNum){             int[] itemArray = new int[maxLength];        for(int k =0;k < itemArray.length;k++)            itemArray[k] = -1;        int help =actualLength;        if(help == maxLength){            help = 0;            for(int h = 0; h < itemArray.length; h++){                itemArray[h] = m_randNum.nextInt((m_instances.attribute(h)).numValues());            }        }        while(help > 0){            int mark = randNum.nextInt(maxLength);            if(itemArray[mark] == -1){                help--;                itemArray[mark] = m_randNum.nextInt((m_instances.attribute(mark)).numValues());            }       }        return itemArray;    }            /**     * Constructs an item set of certain length randomly.     * This method is used for class association rule mining.     * @param maxLength the number of attributes of the instances     * @param actualLength the number of attributes that should be present in the item set     * @param randNum the random number generator     * @return a randomly constructed item set in form of an int array     */     public final int[] randomCARule(int maxLength, int actualLength, Random randNum){             int[] itemArray = new int[maxLength];
priorestimation.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 priorestimation.java 源码文件，采用 Java 编程语言编写，共 479 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?