📄 apriorirules.java

📁 数据挖掘的工具代码（包含fp-tree,appriory
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*ARMiner - Association Rules MinerCopyright (C) 2000  UMass/Boston - Computer Science DepartmentThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (atyour option) any later version.This program is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307USAThe ARMiner Server was written by Dana Cristofor and LaurentiuCristofor.The ARMiner Client was written by Abdelmajid Karatihy, Xiaoyong Kuang,and Lung-Tsung Li.The ARMiner package is currently maintained by Laurentiu Cristofor(laur@cs.umb.edu).*/import java.util.*;import java.io.IOException;import java.io.EOFException;/*  Maintenance log started on November 30th, 2000  Nov. 30th, 2000   - fixed rule generation procedure to use                      all frequent itemsets, not only the                      maximal ones*//**   AprioriRules.java<P>   This class implements the Apriori algorithm    for finding association rules.   (see "Fast Algorithms for Mining Association Rules"   by Rakesh Agrawal and Ramakrishnan Srikant   from IBM Almaden Research Center 1994)   *//*     This file is a part of the ARMiner project.      (P)1999-2000 by ARMiner Server Team:   Dana Cristofor   Laurentiu Cristofor*/public class AprioriRules implements AssociationsFinder{  private SET supports;  private Vector rules;  private float min_support;  private float min_confidence;  private Itemset is_in_antecedent;  private Itemset is_in_consequent;  private Itemset is_ignored;  private int max_antecedent;  private int min_consequent;  // this method stores all frequent itemsets that have support  // greater than the minimum support in a SET for more efficient  // access times.  private void initializeSupports(DBCacheReader cacheReader)  {    // create new SET    supports = new SET();    try      {	Itemset is;	while (true)	  {	    // get item from cache	    is = cacheReader.getNextItemset();	    // if item has support greater than the minimum support	    // required then we add it to the SET	    if (is.getSupport() >= min_support)	      {		supports.insert(is);	      }	  }      }    catch (EOFException e)      {	// do nothing, we just reached the EOF      }    catch (IOException e)      {	System.err.println("Error scanning cache!!!\n" + e);      }    catch (ClassNotFoundException e)      {	System.err.println("Error scanning cache!!!\n" + e);      }  }  /**   * Find association rules in a database, given the set of   * frequent itemsets.   *   * @param cacheReader   the object used to read from the cache   * @param minSupport   the minimum support   * @param minConfidence   the minimum confidence   * @return   a Vector containing all association rules found   */  public Vector findAssociations(DBCacheReader cacheReader,				 float minSupport,				 float minConfidence)  {    min_support = minSupport;    min_confidence = minConfidence;    // create the vector where we'll put the rules    rules = new Vector();        // read from cache supports of frequent itemsets    initializeSupports(cacheReader);    // get the frequent itemsets    Vector frequent = supports.getItemsets();    // generate rules from each frequent itemset    for (int i = 0; i < frequent.size(); i++)      {	// get a frequent itemset	Itemset is_frequent = (Itemset)frequent.get(i);	// skip it if it's too small	if (is_frequent.size() <= 1)	  continue;	// get all possible 1 item consequents	Vector consequents = new Vector(is_frequent.size());	for (int k = 0; k < is_frequent.size(); k++)	  {	    int item = is_frequent.getItem(k);	    Itemset is_consequent = new Itemset(1);	    is_consequent.addItem(item);	    // is_consequent now contains a possible consequent	    // verify now that the rule having this consequent	    // satisfies our requirements	    Itemset is_antecedent = is_frequent.subtract(is_consequent);	    float antecedent_support = (float)0.00001;	    try	      {		antecedent_support = supports.getSupport(is_antecedent);	      }	    catch (SETException e)	      {		System.err.println("Error geting support from SET!!!\n" + e);	      }	    float confidence = is_frequent.getSupport() / antecedent_support;	    if (confidence >= min_confidence)	      {		consequents.add(is_consequent);		// we add the rule to our collection if it satisfies		// our conditions		rules.add(new AssociationRule(is_antecedent, is_consequent,					      is_frequent.getSupport(), 					      confidence));	      }	  }	// call the ap_genrules procedure for generating all rules	// out of this frequent itemset	ap_genrules(is_frequent, consequents);      }    return rules;  }  // this is the ap-genrules procedure that generates rules out  // of a frequent itemset.  private void ap_genrules(Itemset is_frequent, Vector consequents)  {    if (consequents.size() == 0)      return;    // the size of frequent must be bigger than the size of the itemsets    // in consequents by at least 2, in order to be able to generate    // a rule in this call    if (is_frequent.size() > ((Itemset)(consequents.get(0))).size() + 1)      {	Vector new_consequents = apriori_gen(consequents);	AssociationRule ar;	for (int i = 0; i < new_consequents.size(); i++)	  {	    Itemset is_consequent = (Itemset)new_consequents.get(i);	    Itemset is_antecedent = is_frequent.subtract(is_consequent);	    float antecedent_support = (float)0.00001;	    try	      {		antecedent_support = supports.getSupport(is_antecedent);	      }	    catch (SETException e)	      {		System.err.println("Error geting support from SET!!!\n" + e);	      }	    float confidence = is_frequent.getSupport() / antecedent_support;	    	    // if the rule satisfies our requirements we add it	    // to our collection	    if (confidence >= min_confidence)	      rules.add(new AssociationRule(is_antecedent, is_consequent,					    is_frequent.getSupport(), 					    confidence));	    // otherwise we remove the consequent from the collection	    // and we update the index such that we don't skip a consequent	    else	      new_consequents.remove(i--);	  }	ap_genrules(is_frequent, new_consequents);      }  }  // this is the apriori_gen procedure that generates starting from  // a k-itemset collection a new collection of (k+1)-itemsets.  private Vector apriori_gen(Vector itemsets)  {    if (itemsets.size() == 0)      return new Vector(0);    // create a hashtree so that we can check more efficiently the    // number of subsets    // this may not really be necessary when generating rules since    // itemsets will probably be a small collection, but just in case    HashTree ht_itemsets = new HashTree(itemsets);    for (int i = 0; i < itemsets.size(); i++)      ht_itemsets.add(i);    ht_itemsets.prepareForDescent();    Vector result = new Vector();    Itemset is_i, is_j;    for (int i = 0; i < itemsets.size() - 1; i++)      for (int j = i + 1; j < itemsets.size(); j++)	{	  is_i = (Itemset)itemsets.get(i);	  is_j = (Itemset)itemsets.get(j);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -