📄 apriorirules.java
字号:
// if we cannot combine element i with j then we shouldn't // waste time for bigger j's. This is because we keep the // collections ordered, an important detail in this implementation if (!is_i.canCombineWith(is_j)) break; else { Itemset is = is_i.combineWith(is_j); // a real k-itemset has k (k-1)-subsets // so we test that this holds before adding to result if (ht_itemsets.countSubsets(is) == is.size()) result.add(is); } } return result; } /** * Find association rules in a database, given the set of * frequent itemsets and a set of restrictions. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @param inAntecedent the items that must appear in the antecedent * of each rule, if null then this constraint is ignored * @param inConsequent the items that must appear in the consequent * of each rule, if null then this constraint is ignored * @param ignored the items that should be ignored, * if null then this constraint is ignored * @param maxAntecedent the maximum number of items that can appear * in the antecedent of each rule, if 0 then this constraint is ignored * @param minConsequent the minimum number of items that should appear * in the consequent of each rule, if 0 then this constraint is ignored * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence, Itemset inAntecedent, Itemset inConsequent, Itemset ignored, int maxAntecedent, int minConsequent) { min_support = minSupport; min_confidence = minConfidence; is_in_antecedent = inAntecedent; is_in_consequent = inConsequent; is_ignored = ignored; max_antecedent = maxAntecedent; min_consequent = minConsequent; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); if (frequent.size() == 0) return rules; // if we need to ignore some items if (ignored != null) { // remove all frequent itemsets that contain // items to be ignored; their subsets that do // not contain those items will remain for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset)frequent.get(i); if (is.doesIntersect(ignored)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // if we need to have some items in the antecedent or consequent if (inAntecedent != null || inConsequent != null) { // remove frequent itemsets that don't have the // required items for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset)frequent.get(i); if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset)frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float)0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add(new AssociationRule(is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap-genrules procedure for generating all rules // out of this frequent itemset ap_genrules_constraint(is_frequent, consequents); } return rules; } // this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset)(consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset)new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float)0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our confidence requirements if (confidence >= min_confidence) { // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add(new AssociationRule(is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules_constraint(is_frequent, new_consequents); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -