predictiveapriori.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 800 行 · 第 1/2 页
JAVA
800 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    PredictiveApriori.java *    Copyright (C) 2004 Stefan Mutter * */package weka.associations;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import java.io.BufferedReader;import java.io.FileReader;import java.io.Reader;import java.util.Enumeration;import java.util.Hashtable;import java.util.TreeSet;/** <!-- globalinfo-start --> * Class implementing the predictive apriori algorithm to mine association rules.<br/> * It searches with an increasing support threshold for the best 'n' rules concerning a support-based corrected confidence value.<br/> * <br/> * For more information see:<br/> * <br/> * Tobias Scheffer: Finding Association Rules That Trade Support Optimally against Confidence. In: 5th European Conference on Principles of Data Mining and Knowledge Discovery, 424-435, 2001.<br/> * <br/> * The implementation follows the paper expect for adding a rule to the output of the 'n' best rules. A rule is added if:<br/> * the expected predictive accuracy of this rule is among the 'n' best and it is not subsumed by a rule with at least the same expected predictive accuracy (out of an unpublished manuscript from T. Scheffer). * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Scheffer2001, *    author = {Tobias Scheffer}, *    booktitle = {5th European Conference on Principles of Data Mining and Knowledge Discovery}, *    pages = {424-435}, *    publisher = {Springer}, *    title = {Finding Association Rules That Trade Support Optimally against Confidence}, *    year = {2001} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -N &lt;required number of rules output&gt; *  The required number of rules. (default = 100)</pre> *  * <pre> -A *  If set class association rules are mined. (default = no)</pre> *  * <pre> -c &lt;the class index&gt; *  The class index. (default = last)</pre> *  <!-- options-end --> * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 1.7 $ */public class PredictiveApriori   extends Associator   implements OptionHandler, CARuleMiner, TechnicalInformationHandler {    /** for serialization */  static final long serialVersionUID = 8109088846865075341L;    /** The minimum support. */  protected int m_premiseCount;   /** The maximum number of rules that are output. */  protected int m_numRules;  /** The number of rules created for the prior estimation. */  protected static final int m_numRandRules = 1000;   /** The number of intervals used for the prior estimation. */  protected static final int m_numIntervals = 100;    /** The set of all sets of itemsets. */  protected FastVector m_Ls;  /** The same information stored in hash tables. */  protected FastVector m_hashtables;  /** The list of all generated rules. */  protected FastVector[] m_allTheRules;  /** The instances (transactions) to be used for generating       the association rules. */  protected Instances m_instances;  /** The hashtable containing the prior probabilities. */  protected Hashtable m_priors;    /** The mid points of the intervals used for the prior estimation. */  protected double[] m_midPoints;  /** The expected predictive accuracy a rule needs to be a candidate for the output. */  protected double m_expectation;    /** The n best rules. */  protected TreeSet m_best;    /** Flag keeping track if the list of the n best rules has changed. */  protected boolean m_bestChanged;    /** Counter for the time of generation for an association rule. */  protected int m_count;    /** The prior estimator. */  protected PriorEstimation m_priorEstimator;     /** The class index. */    protected int m_classIndex;    /** Flag indicating whether class association rules are mined. */  protected boolean m_car;  /**   * Returns a string describing this associator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Class implementing the predictive apriori algorithm to mine "      + "association rules.\n"      + "It searches with an increasing support threshold for the best 'n' "      + "rules concerning a support-based corrected confidence value.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString() + "\n\n"      + "The implementation follows the paper expect for adding a rule to the "      + "output of the 'n' best rules. A rule is added if:\n"      + "the expected predictive accuracy of this rule is among the 'n' best "      + "and it is not subsumed by a rule with at least the same expected "      + "predictive accuracy (out of an unpublished manuscript from T. "      + "Scheffer).";  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Tobias Scheffer");    result.setValue(Field.TITLE, "Finding Association Rules That Trade Support Optimally against Confidence");    result.setValue(Field.BOOKTITLE, "5th European Conference on Principles of Data Mining and Knowledge Discovery");    result.setValue(Field.YEAR, "2001");    result.setValue(Field.PAGES, "424-435");    result.setValue(Field.PUBLISHER, "Springer");        return result;  }  /**   * Constructor that allows to sets default values for the    * minimum confidence and the maximum number of rules   * the minimum confidence.   */  public PredictiveApriori() {    resetOptions();  }  /**   * Resets the options to the default values.   */  public void resetOptions() {        m_numRules = 105;    m_premiseCount = 1;    m_best = new TreeSet();    m_bestChanged = false;    m_expectation = 0;    m_count = 1;    m_car = false;    m_classIndex = -1;    m_priors = new Hashtable();             }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }    /**   * Method that generates all large itemsets with a minimum support, and from   * these all association rules.   *   * @param instances the instances to be used for generating the associations   * @throws Exception if rules can't be built successfully   */  public void buildAssociations(Instances instances) throws Exception {          int temp = m_premiseCount, exactNumber = m_numRules-5;     m_premiseCount = 1;    m_best = new TreeSet();    m_bestChanged = false;    m_expectation = 0;    m_count = 1;    m_instances = instances;    if (m_classIndex == -1)      m_instances.setClassIndex(m_instances.numAttributes()-1);         else if (m_classIndex < m_instances.numAttributes() && m_classIndex >= 0)      m_instances.setClassIndex(m_classIndex);    else      throw new Exception("Invalid class index.");        // can associator handle the data?    getCapabilities().testWithFail(m_instances);        //prior estimation    m_priorEstimator = new PriorEstimation(m_instances,m_numRandRules,m_numIntervals,m_car);    m_priors = m_priorEstimator.estimatePrior();    m_midPoints = m_priorEstimator.getMidPoints();        m_Ls = new FastVector();    m_hashtables = new FastVector();        for(int i =1; i < m_instances.numAttributes();i++){      m_bestChanged = false;      if(!m_car){        // find large item sets        findLargeItemSets(i);              //find association rules (rule generation procedure)        findRulesQuickly();      }      else{        findLargeCarItemSets(i);        findCaRulesQuickly();      }            if(m_bestChanged){        temp =m_premiseCount;        while(RuleGeneration.expectation(m_premiseCount, m_premiseCount,m_midPoints,m_priors) <= m_expectation){            m_premiseCount++;             if(m_premiseCount > m_instances.numInstances())                break;        }      }      if(m_premiseCount > m_instances.numInstances()){                   // Reserve space for variables        m_allTheRules = new FastVector[3];        m_allTheRules[0] = new FastVector();        m_allTheRules[1] = new FastVector();        m_allTheRules[2] = new FastVector();              int k = 0;        while(m_best.size()>0 && exactNumber > 0){            m_allTheRules[0].insertElementAt((ItemSet)((RuleItem)m_best.last()).premise(),k);            m_allTheRules[1].insertElementAt((ItemSet)((RuleItem)m_best.last()).consequence(),k);            m_allTheRules[2].insertElementAt(new Double(((RuleItem)m_best.last()).accuracy()),k);            k++;            exactNumber--;        }        return;          }            if(temp != m_premiseCount && m_Ls.size() > 0){        FastVector kSets = (FastVector)m_Ls.lastElement();        m_Ls.removeElementAt(m_Ls.size()-1);        kSets = ItemSet.deleteItemSets(kSets, m_premiseCount,Integer.MAX_VALUE);        m_Ls.addElement(kSets);      }    }        // Reserve space for variables    m_allTheRules = new FastVector[3];    m_allTheRules[0] = new FastVector();    m_allTheRules[1] = new FastVector();    m_allTheRules[2] = new FastVector();          int k = 0;    while(m_best.size()>0 && exactNumber > 0){        m_allTheRules[0].insertElementAt((ItemSet)((RuleItem)m_best.last()).premise(),k);        m_allTheRules[1].insertElementAt((ItemSet)((RuleItem)m_best.last()).consequence(),k);        m_allTheRules[2].insertElementAt(new Double(((RuleItem)m_best.last()).accuracy()),k);        k++;        exactNumber--;    }  }    /**     * Method that mines the n best class association rules.     * @return an sorted array of FastVector (depending on the expected predictive accuracy) containing the rules and metric information     * @param data the instances for which class association rules should be mined     * @throws Exception if rules can't be built successfully     */    public FastVector[] mineCARs(Instances data) throws Exception{	         m_car = true;        m_best = new TreeSet();        m_premiseCount = 1;        m_bestChanged = false;        m_expectation = 0;        m_count = 1;	buildAssociations(data);        FastVector[] allCARRules = new FastVector[3];        allCARRules[0] = new FastVector();        allCARRules[1] = new FastVector();        allCARRules[2] = new FastVector();        for(int k =0; k < m_allTheRules[0].size();k++){            int[] newPremiseArray = new int[m_instances.numAttributes()-1];            int help = 0;            for(int j = 0;j < m_instances.numAttributes();j++){                if(j != m_instances.classIndex()){                    newPremiseArray[help] = ((ItemSet)m_allTheRules[0].elementAt(k)).itemAt(j);                    help++;                }            }            ItemSet newPremise = new ItemSet(m_instances.numInstances(), newPremiseArray);            newPremise.setCounter (((ItemSet)m_allTheRules[0].elementAt(k)).counter());            allCARRules[0].addElement(newPremise);            int[] newConsArray = new int[1];            newConsArray[0] =((ItemSet)m_allTheRules[1].elementAt(k)).itemAt(m_instances.classIndex());            ItemSet newCons = new ItemSet(m_instances.numInstances(), newConsArray);            newCons.setCounter(((ItemSet)m_allTheRules[1].elementAt(k)).counter());            allCARRules[1].addElement(newCons);            allCARRules[2].addElement(m_allTheRules[2].elementAt(k));        }        	return allCARRules;    }        /**     * Gets the instances without the class attribute     * @return instances without class attribute     */        public Instances getInstancesNoClass() {            Instances noClass = null;      try{        noClass = LabeledItemSet.divide(m_instances,false);      }       catch(Exception e){        e.printStackTrace();        System.out.println("\n"+e.getMessage());      }      //System.out.println(noClass);      return noClass;  }        /**
predictiveapriori.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 predictiveapriori.java 源码文件，采用 Java 编程语言编写，共 800 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?