⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 partition.java

📁 数据挖掘中的Aprior算法java实现
💻 JAVA
字号:
package datamining;

import java.io.*;
import java.util.*;

/**
 *Partition
 *Implementation of the partition algorithm that uses apriori for finding  *frequent itemsets in each part.
 */

/**
 * Class for implementing partition algorithm for
 * finding frequent itemsets.
 *
 * @author Michael Holler
 * @version 0.2, 17.03.2004
 */
public class Partition {

  Item globalroot;
  Item localroot;	
 
  int globalsupport;
  int localsupport;

  int partsize;
  int partcount;

  String filename;
  DataHandler dh;

  int total;
  
  /**
   * Default contructor for creating a Partition object.
   */
  public Partition() {

  }

  /**
   * Constructur for creating a Partition object with parameters.
   *
   * @param	filename	the name of the database file
   * @param	minsup		the minimal support threshold
   * @param	partsize	rows read from the database in one scan	
   */ 
  public Partition(String filename, int minsup, int partsize, int partcount) {
    this.partsize = partsize;
    this.globalsupport = minsup;
    this.localsupport = minsup/partcount+1;
    this.partcount = partcount;
    this.filename = filename;
    this.globalroot = new Item(0);
    this.dh = new DataHandler(filename);
  }

  /**
   * The workhorse method that implements the partition algorithm.
   */  
  public void findFrequentSets() { 
    int fine, merged, generated;
    Apriori apriori;

    dh.setPartsize(this.partsize);
    int counter = partcount;

    while (counter > 0) {	    
      apriori = new Apriori(localsupport, this.dh);
      apriori.findFrequentSets(); 
      this.localroot = apriori.getTrie();
      generated = Tools.zeroSupport(localroot);	// only needed for counting 
      merged = mergeTrie(globalroot, localroot);
      total += merged;
      System.out.println("total: " + this.total + 
		         ", generated: " + generated +
			 ", merged: " + merged); 
      counter--;
      dh.nextPart();
    }
    dh = new DataHandler(filename);
    Tools.countSupport(globalroot, dh);
    fine = pruneCandidates(globalroot, globalsupport);
    
    this.total = fine;
    System.out.println("\nFrequent sets found: " + this.total + ".");
  }

  /**
   * Method for pruning the candidates. Removes items that are
   * not frequent from the Trie.
   *
   * @param	item	the item the children of which will be pruned
   * @return		the number of items not removed from the candidates
   */
  public int pruneCandidates(Item item, int support) {
    Vector v = item.getChildren(); 
    Item child = item;
    int fine = 0;
    
    for (Enumeration e = new Vector(v).elements(); e.hasMoreElements(); ) { 
      child = (Item)e.nextElement(); 

      if (child.getSupport() < support) {
	v.remove(child);
      } else {
	fine++;
        fine += pruneCandidates(child, support);
      }
    }
    return fine;
  }

  /**
   * Merges a Trie into the other one.
   *
   * @param	global	the item in the Trie to which the other will be merged
   * @param	local	the item in the Trie which will be merged
   * @return		the number of item merged
   */
  public int mergeTrie(Item global, Item local) {
    Vector gv = global.getChildren();
    Vector lv = local.getChildren();
    Item item;
    Enumeration e = lv.elements();

    int merged = 0;
    int tmp;
    int index = 0;
    
    while (e.hasMoreElements()) {
      item = (Item)e.nextElement();
      tmp = gv.indexOf(item, index);
      if (tmp >= 0) {
	index = tmp + 1;
        merged += mergeTrie((Item)gv.elementAt(tmp), item);
      } else {
	while (index < gv.size() && ((Item)gv.elementAt(index)).getLabel() < item.getLabel()) index++;
	Item tmpitem = new Item(item.getLabel());
        global.addChild(tmpitem, index);
	merged++;
        merged += mergeTrie(tmpitem, item);
      } 
    }
	  
    return merged;
  }

  /**
   * Main method for testing the algorithm.
   *
   * @param 	args	the arguments can contain the filename
   * 			of the testfile and the minimal support
   * 			threshold and a filename for output
   */
  public static void main(String args[]) {
    String testfile = "test.dat";
    String outfile = "";
    int support = 4;
    int size = 4;
    int count = 4;
    try {
      testfile = args[0];
    } catch (Exception e) {
      System.out.println("Didn't get filename. Using '" + testfile + "'.");
    }
    try {
      support = new Integer(args[1]).intValue();
    } catch (Exception e) {
      System.out.println("Didn't get support threshold. Using '" + support + "'.");
    }	    

    try {
      size = new Integer(args[2]).intValue();
    } catch (Exception e) {
      System.out.println("Didn't get partsize. Using '" + size + "'.");
    }

    try {
      count = new Integer(args[3]).intValue();
    } catch (Exception e) {
      System.out.println("Didn't get partcount. Using '" + count + "'.");
    }
    
    StopWatch sw = new StopWatch();
    sw.start();
    Partition part = new Partition(testfile, support, size, count);
    part.findFrequentSets();
    sw.stop();
    sw.print();
  }

}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -