📄 partition.java
字号:
package datamining;
import java.io.*;
import java.util.*;
/**
*Partition
*Implementation of the partition algorithm that uses apriori for finding *frequent itemsets in each part.
*/
/**
* Class for implementing partition algorithm for
* finding frequent itemsets.
*
* @author Michael Holler
* @version 0.2, 17.03.2004
*/
public class Partition {
Item globalroot;
Item localroot;
int globalsupport;
int localsupport;
int partsize;
int partcount;
String filename;
DataHandler dh;
int total;
/**
* Default contructor for creating a Partition object.
*/
public Partition() {
}
/**
* Constructur for creating a Partition object with parameters.
*
* @param filename the name of the database file
* @param minsup the minimal support threshold
* @param partsize rows read from the database in one scan
*/
public Partition(String filename, int minsup, int partsize, int partcount) {
this.partsize = partsize;
this.globalsupport = minsup;
this.localsupport = minsup/partcount+1;
this.partcount = partcount;
this.filename = filename;
this.globalroot = new Item(0);
this.dh = new DataHandler(filename);
}
/**
* The workhorse method that implements the partition algorithm.
*/
public void findFrequentSets() {
int fine, merged, generated;
Apriori apriori;
dh.setPartsize(this.partsize);
int counter = partcount;
while (counter > 0) {
apriori = new Apriori(localsupport, this.dh);
apriori.findFrequentSets();
this.localroot = apriori.getTrie();
generated = Tools.zeroSupport(localroot); // only needed for counting
merged = mergeTrie(globalroot, localroot);
total += merged;
System.out.println("total: " + this.total +
", generated: " + generated +
", merged: " + merged);
counter--;
dh.nextPart();
}
dh = new DataHandler(filename);
Tools.countSupport(globalroot, dh);
fine = pruneCandidates(globalroot, globalsupport);
this.total = fine;
System.out.println("\nFrequent sets found: " + this.total + ".");
}
/**
* Method for pruning the candidates. Removes items that are
* not frequent from the Trie.
*
* @param item the item the children of which will be pruned
* @return the number of items not removed from the candidates
*/
public int pruneCandidates(Item item, int support) {
Vector v = item.getChildren();
Item child = item;
int fine = 0;
for (Enumeration e = new Vector(v).elements(); e.hasMoreElements(); ) {
child = (Item)e.nextElement();
if (child.getSupport() < support) {
v.remove(child);
} else {
fine++;
fine += pruneCandidates(child, support);
}
}
return fine;
}
/**
* Merges a Trie into the other one.
*
* @param global the item in the Trie to which the other will be merged
* @param local the item in the Trie which will be merged
* @return the number of item merged
*/
public int mergeTrie(Item global, Item local) {
Vector gv = global.getChildren();
Vector lv = local.getChildren();
Item item;
Enumeration e = lv.elements();
int merged = 0;
int tmp;
int index = 0;
while (e.hasMoreElements()) {
item = (Item)e.nextElement();
tmp = gv.indexOf(item, index);
if (tmp >= 0) {
index = tmp + 1;
merged += mergeTrie((Item)gv.elementAt(tmp), item);
} else {
while (index < gv.size() && ((Item)gv.elementAt(index)).getLabel() < item.getLabel()) index++;
Item tmpitem = new Item(item.getLabel());
global.addChild(tmpitem, index);
merged++;
merged += mergeTrie(tmpitem, item);
}
}
return merged;
}
/**
* Main method for testing the algorithm.
*
* @param args the arguments can contain the filename
* of the testfile and the minimal support
* threshold and a filename for output
*/
public static void main(String args[]) {
String testfile = "test.dat";
String outfile = "";
int support = 4;
int size = 4;
int count = 4;
try {
testfile = args[0];
} catch (Exception e) {
System.out.println("Didn't get filename. Using '" + testfile + "'.");
}
try {
support = new Integer(args[1]).intValue();
} catch (Exception e) {
System.out.println("Didn't get support threshold. Using '" + support + "'.");
}
try {
size = new Integer(args[2]).intValue();
} catch (Exception e) {
System.out.println("Didn't get partsize. Using '" + size + "'.");
}
try {
count = new Integer(args[3]).intValue();
} catch (Exception e) {
System.out.println("Didn't get partcount. Using '" + count + "'.");
}
StopWatch sw = new StopWatch();
sw.start();
Partition part = new Partition(testfile, support, size, count);
part.findFrequentSets();
sw.stop();
sw.print();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -