📄 apriorirules.java
字号:
/*ARMiner - Association Rules MinerCopyright (C) 2000 UMass/Boston - Computer Science DepartmentThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (atyour option) any later version.This program is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307USAThe ARMiner Server was written by Dana Cristofor and LaurentiuCristofor.The ARMiner Client was written by Abdelmajid Karatihy, Xiaoyong Kuang,and Lung-Tsung Li.The ARMiner package is currently maintained by Laurentiu Cristofor(laur@cs.umb.edu).*/import java.util.*;import java.io.IOException;import java.io.EOFException;/* Maintenance log started on November 30th, 2000 Nov. 30th, 2000 - fixed rule generation procedure to use all frequent itemsets, not only the maximal ones*//** AprioriRules.java<P> This class implements the Apriori algorithm for finding association rules. (see "Fast Algorithms for Mining Association Rules" by Rakesh Agrawal and Ramakrishnan Srikant from IBM Almaden Research Center 1994) *//* This file is a part of the ARMiner project. (P)1999-2000 by ARMiner Server Team: Dana Cristofor Laurentiu Cristofor*/public class AprioriRules implements AssociationsFinder{ private SET supports; private Vector rules; private float min_support; private float min_confidence; private Itemset is_in_antecedent; private Itemset is_in_consequent; private Itemset is_ignored; private int max_antecedent; private int min_consequent; // this method stores all frequent itemsets that have support // greater than the minimum support in a SET for more efficient // access times. private void initializeSupports(DBCacheReader cacheReader) { // create new SET supports = new SET(); try { Itemset is; while (true) { // get item from cache is = cacheReader.getNextItemset(); // if item has support greater than the minimum support // required then we add it to the SET if (is.getSupport() >= min_support) { supports.insert(is); } } } catch (EOFException e) { // do nothing, we just reached the EOF } catch (IOException e) { System.err.println("Error scanning cache!!!\n" + e); } catch (ClassNotFoundException e) { System.err.println("Error scanning cache!!!\n" + e); } } /** * Find association rules in a database, given the set of * frequent itemsets. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) { min_support = minSupport; min_confidence = minConfidence; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset)frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float)0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // we add the rule to our collection if it satisfies // our conditions rules.add(new AssociationRule(is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap_genrules procedure for generating all rules // out of this frequent itemset ap_genrules(is_frequent, consequents); } return rules; } // this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset)(consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset)new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float)0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our requirements we add it // to our collection if (confidence >= min_confidence) rules.add(new AssociationRule(is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules(is_frequent, new_consequents); } } // this is the apriori_gen procedure that generates starting from // a k-itemset collection a new collection of (k+1)-itemsets. private Vector apriori_gen(Vector itemsets) { if (itemsets.size() == 0) return new Vector(0); // create a hashtree so that we can check more efficiently the // number of subsets // this may not really be necessary when generating rules since // itemsets will probably be a small collection, but just in case HashTree ht_itemsets = new HashTree(itemsets); for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i); ht_itemsets.prepareForDescent(); Vector result = new Vector(); Itemset is_i, is_j; for (int i = 0; i < itemsets.size() - 1; i++) for (int j = i + 1; j < itemsets.size(); j++) { is_i = (Itemset)itemsets.get(i); is_j = (Itemset)itemsets.get(j);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -