📄 ndi.java
字号:
//package datamining;
import java.util.*;
/**
* Class for implementing the non-derivable itemset algorithm.
*
* In this version support for the constructor using a DataHandler.
*
* @author Michael Holler
* @version 0.3, 18.02.2004
*/
public class NDI extends Apriori {
/**
* Default constructur for creating a NDI object.
*/
public NDI() {
super();
}
/**
* Constructor for creating a NDI object with parameters.
* Calls the constructor of Apriori.
*
* @param filename the name of database file
* @param support the minimal support threshold
* @param outfile the name of the output file
*/
public NDI(String filename, int support, String outfile) {
super(filename, support, outfile);
}
/**
* Constructur for creating a Apriori object with parameters.
* This one is used with other mining algorithms.
*
* @param minsup the minimal support threshold
* @param datahandler the handler for the database
*/
public NDI(int minsup, DataHandler datahandler) {
super(minsup, datahandler);
}
/**
* Method for copying the siblings of an Item to its children.
*
* @param item the item to which the siblings are copied
* @param siblings the siblings to be copied
* @param current the current itemset to be generated
* @return the number of siblings copied
*/
public int copySiblings(Item item, Vector siblings, Vector current) {
Enumeration e = siblings.elements();
Item parent = item;
Item sibling = new Item();
int copied = 0;
while (sibling.getLabel() < parent.getLabel() && e.hasMoreElements()) {
sibling = (Item)e.nextElement();
}
while (e.hasMoreElements()) {
sibling = (Item)e.nextElement();
current.add(sibling);
Item rule = new Item(0);
rule.incSupport(-this.root.getSupport());
if (generateBounds(rule, current, this.root, new Vector(), 0)) {
Interval bounds = new Interval(0, this.root.getSupport());
bounds = getBounds(rule, current.size(), 0);
if (bounds.upperBound() >= this.minsup
&& bounds.upperBound() != bounds.lowerBound()) {
parent.addChild(new Item(sibling.getLabel()));
copied++;
}
}
current.remove(sibling);
}
return copied;
}
/**
* Checks if the subsets of the itemset to be generated are all frequent.
*
* @param rule the root of the rule trie
* @param current the itemset for which the bounds are generated
* @param item the item from the candidate trie to be found in
* the current itemset
* @param subset the subset generated from the current itemset
* @param mark the mark in the current itemset
* @return true if the bounds can be generated, else false
*/
public boolean generateBounds(Item rule, Vector current, Item item, Vector subset, int mark) {
boolean ok = true;
Vector children = item.getChildren();
Item child;
int index;
int i = current.size() - 1;
while (ok && (mark <= i)) {
index = children.indexOf(current.elementAt(i));
if (index >= 0) {
child = (Item)children.elementAt(index);
subset.add(child);
int support = child.getSupport();
if (subset.size()%2 == 0) {
support *= -1;
}
rule.incSupport(support);
addToRules(rule, subset, support, 0);
if (subset.size() < current.size()-1) {
ok = generateBounds(rule, current, child, subset, i+1);
}
subset.remove(child);
} else {
ok = false;
}
i--;
}
return ok;
}
/**
* Adds rules into the rule trie. If the rule already exists, its support
* will be increased by the support given as parameter.
*
* @param rule the rule of which the children are checked for the
* appropriate rule
* @param subset the itemset containing the rule to be added/updated
* @param support the support to be added to the correct rules
* @param mark the mark in the subset
*/
public void addToRules(Item rule, Vector subset, int support, int mark) {
Vector rules = rule.getChildren();
Item child;
int index;
int i = subset.size() - 1;
Interval in = new Interval();
while (mark <= i) {
index = rules.indexOf(subset.elementAt(i));
if (index >= 0) {
child = (Item)rules.elementAt(index);
} else {
child = new Item(((Item)subset.lastElement()).getLabel());
rule.addChild(child, 0);
}
child.incSupport(support);
addToRules(child, subset, support, i+1);
i--;
}
}
/**
* Computes the bounds in a rule trie.
*
* @param rule the item the children of which are computed
* @param size the length of the path in the trie
* @param depth the depth of recursion
* @return the bounds in an Interval
*/
public Interval getBounds(Item rule, int size, int depth) {
Vector rules = rule.getChildren();
int support = rule.getSupport();
Interval in = new Interval(0, this.root.getSupport());
Item child;
for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
child = (Item)e.nextElement();
Interval childin = getBounds(child, size, depth+1);
in.setLower(childin.lowerBound());
in.setUpper(childin.upperBound());
}
if (size%2 != 0) {
support *= -1;
}
if ((size - depth)%2 == 0) {
in.setLower(support);
} else {
in.setUpper(support);
}
return in;
}
/**
* Main method for testing the algorithm.
*
* @param args the arguments can contain the filename
* of the testfile and the minimal support
* threshold and a filename for output
*/
public static void main(String args[]) {
String testfile = "test.dat";
String outfile = "";
int support = 5;
try {
testfile = args[0];
} catch (Exception e) {
System.out.println("Didn't get filename. Using '" + testfile + "'.");
}
try {
support = new Integer(args[1]).intValue();
} catch (Exception e) {
System.out.println("Didn't get support threshold. Using '" + support + "'.");
}
try {
outfile = args[2];
} catch (Exception e) {
System.out.println("Didn't get output filename. Not printing.");
}
NDI ndi = new NDI(testfile, support, outfile);
ndi.findFrequentSets();
ndi.printFrequentSets();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -