📄 apriori.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Apriori.java *    Copyright (C) 1999 Eibe Frank,Mark Hall * */package weka.associations;import java.io.*;import java.util.*;import weka.core.*;import weka.filters.Filter;import weka.filters.unsupervised.attribute.Remove;/** * Class implementing an Apriori-type algorithm. Iteratively reduces the minimum * support until it finds the required number of rules with the given minimum  * confidence. <p> * * Reference: R. Agrawal, R. Srikant (1994). <i>Fast algorithms for * mining association rules in large databases </i>. Proc * International Conference on Very Large Databases, * pp. 478-499. Santiage, Chile: Morgan Kaufmann, Los Altos, CA. <p> * * Valid options are:<p> *    * -N required number of rules <br> * The required number of rules (default: 10). <p> * * -T type of metric by which to sort rules <br> * 0 = confidence | 1 = lift | 2 = leverage | 3 = Conviction. <p> * * -C minimum confidence of a rule <br> * The minimum confidence of a rule (default: 0.9). <p> * * -D delta for minimum support <br> * The delta by which the minimum support is decreased in * each iteration (default: 0.05). <p> * * -U upper bound for minimum support <br> * The upper bound for minimum support. Don't explicitly look for  * rules with more than this level of support. <p> * * -M lower bound for minimum support <br> * The lower bound for the minimum support (default = 0.1). <p> * * -S significance level <br> * If used, rules are tested for significance at * the given level. Slower (default = no significance testing). <p> * * -R <br> * If set then columns that contain all missing values are removed from * the data. * * -I <br> * If set the itemsets found are also output (default = no). <p> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class Apriori extends Associator implements OptionHandler {    /** The minimum support. */  protected double m_minSupport;  /** The upper bound on the support */  protected double m_upperBoundMinSupport;  /** The lower bound for the minimum support. */  protected double m_lowerBoundMinSupport;  /** Metric types. */  protected static final int CONFIDENCE = 0;  protected static final int LIFT = 1;  protected static final int LEVERAGE = 2;  protected static final int CONVICTION = 3;  public static final Tag [] TAGS_SELECTION = {    new Tag(CONFIDENCE, "Confidence"),    new Tag(LIFT, "Lift"),    new Tag(LEVERAGE, "Leverage"),    new Tag(CONVICTION, "Conviction")      };  /** The selected metric type. */  protected int m_metricType = CONFIDENCE;  /** The minimum metric score. */  protected double m_minMetric;  /** The maximum number of rules that are output. */  protected int m_numRules;  /** Delta by which m_minSupport is decreased in each iteration. */  protected double m_delta;  /** Significance level for optional significance test. */  protected double m_significanceLevel;  /** Number of cycles used before required number of rules was one. */  protected int m_cycles;  /** The set of all sets of itemsets L. */  protected FastVector m_Ls;  /** The same information stored in hash tables. */  protected FastVector m_hashtables;  /** The list of all generated rules. */  protected FastVector[] m_allTheRules;  /** The instances (transactions) to be used for generating       the association rules. */  protected Instances m_instances;  /** Output itemsets found? */  protected boolean m_outputItemSets;  protected boolean m_removeMissingCols;  /** Report progress iteratively */  protected boolean m_verbose;  /**   * Returns a string describing this associator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Finds association rules.";  }  /**   * Constructor that allows to sets default values for the    * minimum confidence and the maximum number of rules   * the minimum confidence.   */  public Apriori() {    resetOptions();  }  /**   * Resets the options to the default values.   */  public void resetOptions() {        m_removeMissingCols = false;    m_verbose = false;    m_delta = 0.05;    m_minMetric = 0.90;    m_numRules = 10;    m_lowerBoundMinSupport = 0.1;    m_upperBoundMinSupport = 1.0;    m_significanceLevel = -1;    m_outputItemSets = false;  }  /**   * Removes columns that are all missing from the data   * @param instances the instances   * @return a new set of instances with all missing columns removed   */  private Instances removeMissingColumns(Instances instances)     throws Exception {    int numInstances = instances.numInstances();    StringBuffer deleteString = new StringBuffer();    int removeCount = 0;    boolean first = true;    int maxCount = 0;        for (int i=0;i<instances.numAttributes();i++) {      AttributeStats as = instances.attributeStats(i);      if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {	// see if we can decrease this by looking for the most frequent value	int [] counts = as.nominalCounts;	if (counts[Utils.maxIndex(counts)] > maxCount) {	  maxCount = counts[Utils.maxIndex(counts)];	}      }      if (as.missingCount == numInstances) {	if (first) {	  deleteString.append((i+1));	  first = false;	} else {	  deleteString.append(","+(i+1));	}	removeCount++;      }    }    if (m_verbose) {      System.err.println("Removed : "+removeCount+" columns with all missing "			 +"values.");    }    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {      m_upperBoundMinSupport = (double)maxCount / (double)numInstances;      if (m_verbose) {	System.err.println("Setting upper bound min support to : "			   +m_upperBoundMinSupport);      }    }    if (deleteString.toString().length() > 0) {      Remove af = new Remove();      af.setAttributeIndices(deleteString.toString());      af.setInvertSelection(false);      af.setInputFormat(instances);      Instances newInst = Filter.useFilter(instances, af);      return newInst;    }    return instances;  }  /**   * Method that generates all large itemsets with a minimum support, and from   * these all association rules with a minimum confidence.   *   * @param instances the instances to be used for generating the associations   * @exception Exception if rules can't be built successfully   */  public void buildAssociations(Instances instances) throws Exception {    double[] confidences, supports;    int[] indices;    FastVector[] sortedRuleSet;    int necSupport=0;    if (instances.checkForStringAttributes()) {      throw new Exception("Can't handle string attributes!");    }    if (m_removeMissingCols) {      instances = removeMissingColumns(instances);    }    // Decrease minimum support until desired number of rules found.    m_cycles = 0;    m_minSupport = m_upperBoundMinSupport - m_delta;    m_minSupport = (m_minSupport < m_lowerBoundMinSupport)       ? m_lowerBoundMinSupport       : m_minSupport;    do {      // Reserve space for variables      m_Ls = new FastVector();      m_hashtables = new FastVector();      m_allTheRules = new FastVector[6];      m_allTheRules[0] = new FastVector();      m_allTheRules[1] = new FastVector();      m_allTheRules[2] = new FastVector();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	m_allTheRules[3] = new FastVector();	m_allTheRules[4] = new FastVector();	m_allTheRules[5] = new FastVector();      }      sortedRuleSet = new FastVector[6];      sortedRuleSet[0] = new FastVector();      sortedRuleSet[1] = new FastVector();      sortedRuleSet[2] = new FastVector();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	sortedRuleSet[3] = new FastVector();	sortedRuleSet[4] = new FastVector();	sortedRuleSet[5] = new FastVector();      }      // Find large itemsets and rules      findLargeItemSets(instances);      if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) 	findRulesBruteForce();      else	findRulesQuickly();            // Sort rules according to their support      supports = new double[m_allTheRules[2].size()];      for (int i = 0; i < m_allTheRules[2].size(); i++) 	supports[i] = (double)((ItemSet)m_allTheRules[1].elementAt(i)).support();      indices = Utils.stableSort(supports);      for (int i = 0; i < m_allTheRules[2].size(); i++) {	sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));	sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));	sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	  sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));	  sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));	  sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));	}      }      // Sort rules according to their confidence      m_allTheRules[0].removeAllElements();      m_allTheRules[1].removeAllElements();      m_allTheRules[2].removeAllElements();      if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	m_allTheRules[3].removeAllElements();	m_allTheRules[4].removeAllElements();	m_allTheRules[5].removeAllElements();      }      confidences = new double[sortedRuleSet[2].size()];      int sortType = 2 + m_metricType;      for (int i = 0; i < sortedRuleSet[2].size(); i++) 	confidences[i] = 	  ((Double)sortedRuleSet[sortType].elementAt(i)).doubleValue();      indices = Utils.stableSort(confidences);      for (int i = sortedRuleSet[0].size() - 1; 	   (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {	m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));	m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));	m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));	if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {	  m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));	  m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));	  m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
12 3 下一页
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -