📄 sampling.java
字号:
package datamining;
import java.io.*;
import java.util.*;
/**
* Class for finding frequent itemsets using sampling
* with the Apriori algorithm.
*
* @author Michael Holler
* @version 0.1, 16.03.2004
*/
public class Sampling {
String filename; // name of the database file
int minsup; // the minimal support threshold
int samplesize; // how big a part is taken as a sample
Item root;
DataHandler dh;
/**
* Default constructur for creating a Sampling object.
*/
public Sampling() {
}
/**
* Constructor for creating a Sampling object with parameters.
*
* @param filename the name of database file
* @param support the minimal support threshold
* @param samplesize how manieth row will be taken to the example
*/
public Sampling(String filename, int support, int samplesize) {
this.dh = new DataHandler(filename);
this.minsup = support;
this.samplesize = samplesize;
}
/**
* The workhorse method which includes the basic idea of the
* sampling algorithm.
*/
public void findFrequentSets() {
// mark a sample and run it with apriori
dh.setSample(samplesize);
Apriori apriori = new Apriori(minsup/samplesize, dh);
apriori.findFrequentSets();
// get the results and count the true support over the whole database
this.root = apriori.getTrie();
int candidates = Tools.zeroSupport(this.root);
dh.setSample(1);
Tools.countSupport(this.root, dh);
// remove infrequent
int pruned = Tools.pruneCandidates(root, this.minsup);
int found = Tools.zeroSupport(root);
System.out.println("candidates: " + candidates +
", pruned: " + pruned +
", found: " + found);
}
/**
* Main method for testing the algorithm.
*
* @param args the arguments can contain the filename
* of the testfile and the minimal support
* threshold and a filename for output
*/
public static void main(String args[]) {
String testfile = "test.dat";
String outfile = "";
int support = 5;
int samplesize = 10;
try {
testfile = args[0];
} catch (Exception e) {
System.out.println("Didn't get filename. Using '" + testfile + "'.");
}
try {
support = new Integer(args[1]).intValue();
} catch (Exception e) {
System.out.println("Didn't get support threshold. Using '" + support + "'.");
}
try {
samplesize = new Integer(args[2]).intValue();
} catch (Exception e) {
System.out.println("Didn't get output filename. Not printing.");
}
StopWatch sw = new StopWatch();
sw.start();
Sampling aws = new Sampling(testfile, support, samplesize);
aws.findFrequentSets();
sw.stop();
sw.print();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -