⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aprioriall.java

📁 为了下东西 随便发了个 datamining 的源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/**
 * Title:        XELOPES Data Mining Library
 * Description:  Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>;  <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.
 * Copyright:    Copyright (c) 2002 Prudential Systems Software GmbH
 * Company:      <a href="mailto:valentine.stepanenko@zsoft.ru">ZSoft, Spb, Russia</a>
 * @author Victor Borichev
 * @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
 * @version 1.0
 */

package com.prudsys.pdm.Models.CustomerSeq.Algorithms;

import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.Event.Status.CreationModelBeginMessage;
import com.prudsys.pdm.Core.Event.Status.ReadingBeginMessage;
import com.prudsys.pdm.Core.Event.Status.ReadingEndMessage;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
import com.prudsys.pdm.Models.CustomerSeq.CustomSequence;
import com.prudsys.pdm.Models.CustomerSeq.CustomerSequentialAlgorithm;
import com.prudsys.pdm.Models.Sequential.IntVec;
import com.prudsys.pdm.Models.Sequential.Algorithms.Seq.TransactionSet;
import com.prudsys.pdm.Utils.IntHashtable;
import com.prudsys.pdm.Utils.IntVector;

public class AprioriAll extends CustomerSequentialAlgorithm {

  Vector rules = new Vector();

  /** Customer transaction set */
  private CustomTransSet customerTransSet = null;

  /**
   * for litemsets phase of algorithm
   */
  /** Used for Lk on all steps of algorithm */
  private int L[][];
  /** Used for association table: Transaction ID -> large itemsets list */
  private int TIDTable[][];
  /** Used for customer ID of each transaction */
  private int CustomerID[];

  /** All Lk */
  private Vector allLargeItemsets;
  /** Supports for all Lk */
  private Vector allLargeItemsetsSupp;
  /** TID's */
  private Vector allTIDs;
  /** all litemsets */
  private ItemSet[] allLits;


  /** Minimum number of supporting transactions, only used if >0. */
  private int minimumSupportCount = 0;
  /** Minimum support count */
  private int m_minSuppInt;
  /** Minimum item site. */
  private int    m_minItemSize = 1;
  /** Maximum item size. */
  private int    m_maxItemSize = -1;

  /**
   * for sequential phases of algorithm
   */
  /** all large sequences */
  private Vector allLargeSequences;
  /** debug level */
  private int debug = 1;
  /** maximal length of row of tidtable */
  private int maxTidSize = 0;
  /** B1[i] = false if L1[i] doesn't make any child */
  private boolean[] B1;
  /** vector of all Bk for all Lk except for last*/
  private Vector allBk;
  /** all litemset from apriori phase as an arrays */
  private int[][] allLitsArr;
  /** all litemsets supports */
  private int[] allLitsSupp;
  /** d pruning phase */
  private boolean doPruning = true;


  public boolean getDoPruning() { return doPruning; }

  public void setDoPruning( boolean p ) { doPruning = p; }

  /**
   * Returns minimum number of transactions containing item pairs.
   * The relative minimum support value is defined by the minimumSupport
   * parameter.
   *
   * @return minimum support count
   */
  public int getMinimumSupportCount()
  {
    return minimumSupportCount;
  }

  /**
   * Sets minimum number of transactions containing item pairs.
   * The relative minimum support value is defined by the minimumSupport
   * parameter.
   *
   * @param minimumSupportCount new minimum support count
   */
  public void setMinimumSupportCount(int minimumSupportCount)
  {
    this.minimumSupportCount = minimumSupportCount;
  }

  /**
   * The method getMinSupport returns the minimum support.
   *
   * @return minimum support
   */
  public double getMinSupport() {

    return minimumSupport;
  }

  /**
   * The method getMinSupport sets the minimum support.
   *
   * @param minimum support to set
   */
  public void setMinSupport(double minSupp) {

    minimumSupport = minSupp;
  }

  public int getMinItemSize() {
    return m_minItemSize;
  }

  public void setMinItemSize(int minItemSize) {
    m_minItemSize = minItemSize;
  }

  public int getMaxItemSize() {
    return m_maxItemSize;
  }

  public void setMaxItemSize(int maxItemSize) {
    m_maxItemSize = maxItemSize;
  }


  /**
   * Checks mining algorithm for completeness by calling verify method
   * of superclass. Adiitionally, it checks whether minimumItemSize and
   * maximumItemSize are admissible.
   *
   * @throws IllegalArgumentException if some algorithm attributes are incorrect
   */
  public void verify() throws IllegalArgumentException
  {
    super.verify();
    if (m_minItemSize < 0)
      throw new IllegalArgumentException("minimumItemSize can't be negative");
    if (m_maxItemSize > -1 && m_minItemSize > m_maxItemSize)
      throw new IllegalArgumentException("minimumItemSize can't be larger than maximumItemSize");

    if (generateRules && doPruning)
      throw new IllegalArgumentException("can't generate rules with pruning");
  }

  //               << By Alexey Grinyuk >>

  /**
   * Implements the Algorithm AprioriTID.
   *
   * @param transSet transaction set
   * @return result as list of association rules
   * @author Alexey Grinyuk
   */
  private void apriori(CustomTransSet customTransSet) {
    int L1Supp[];

    m_minSuppInt = minimumSupportCount;
    if (m_minSuppInt <= 0)
      m_minSuppInt = (int)Math.ceil( minimumSupport * customTransSet.getSize() );
    System.out.println("---------minSuppCount: " + m_minSuppInt + "--------");

    // build L1 and TIDTable
    L1Supp = createL1(customTransSet);
    sortArrays(L, L1Supp);
    createTIDTable(customTransSet);

    System.out.println("|L1| = " + L.length);

    // add L1 to general result
    allLargeItemsets.add(L);
    allLargeItemsetsSupp.add(L1Supp);
    allTIDs.add(TIDTable);

    createAllLk();  // recursive function for build next Lk
    System.out.println("createAllLk finished");

  }


  /**
   * Create L1 and store it in L[]
   *
   * @param transSet transaction set
   * @return supports for L1
   * @author Alexey Grinyuk
   */
  private int[] createL1(CustomTransSet customTransSet)
  {
    int i, j, item, supp;
    int customer;
    IntHashtable hashtable = new IntHashtable();
    IntHashtable hashtableTest = new IntHashtable();
    Integer suppCount;
    Integer test;
    int newItem;
    boolean addFlag;
    TransactionSet transSet;

    ItemSet transactionItemSet;
    int itemCounter;
    int tmpL[][];
    int tmpLSupp[];
    int result[];

    for(customer=0; customer<customTransSet.getSize(); customer++) {
      transSet = customTransSet.getCustomerTransSet(customer);
      // for all transactions
      for(i=0; i<transSet.getSize(); i++) {
        transactionItemSet = (ItemSet)transSet.getTransactionAt(i);
        // for all items of transaction
        for(j=0; j<transactionItemSet.getSize(); j++) {

          newItem = transactionItemSet.getItemAt(j);

          test = hashtableTest.get(newItem);
          // if item not found in hashtableTest
          if(test == null) {
            hashtableTest.put(newItem, customer); // add item to hashtableTest
            addFlag = true;
          } else if( test.equals(new Integer(customer)) ) {
            addFlag = false;
          } else {
            addFlag = true;
            hashtableTest.put(newItem, customer);
          }

          if(addFlag) {
            suppCount = hashtable.get(newItem);

            // if item not found in hashtable
            if(suppCount == null) {
              // add item to hashtable
              hashtable.put(newItem, 1);
            } else {
              // increment support counter
              hashtable.put(newItem, suppCount.intValue() + 1);
            }
          }

        }
      }
    }

    itemCounter = hashtable.size();
    tmpL = new int[itemCounter][1]; // allocate memory for L
    tmpLSupp = new int[itemCounter]; // allocate memory for supports

    Enumeration em = hashtable.keys();
    i = 0;

    // find large itemsets (build L1)
    while(em.hasMoreElements()) {
      item = ((Integer)em.nextElement()).intValue();
      supp = ((Integer)hashtable.get( item )).intValue();
      if(supp >= m_minSuppInt) {
        tmpL[i][0] = item;
        tmpLSupp[i] = supp;
        i++;
      }
    }

    L = new int[i][1];
    result = new int[i];

    // rebuild arrayas
    for(j=0; j<i; j++) {
      L[j] = tmpL[i - j - 1];
      result[j] = tmpLSupp[i - j - 1];
    }

    // return supports for L1
    return result;
  }


  /**
   * Create association table: Transaction ID -> large itemsets list
   * Store rezult in TIDTable[][]
   *
   * @param transSet transaction set
   * @author Alexey Grinyuk
   */
  private void createTIDTable(CustomTransSet customTransSet) {
    int i, j, customer, transactionSize;
    int counter = 0;
    int transactionCounter = 0;
    int customerTransCounter;
    TransactionSet transSet;
    ItemSet transactionItemSet;

    for(i=0; i<customTransSet.getSize(); i++) {
      transSet = customTransSet.getCustomerTransSet(i);
      transactionCounter += transSet.transactionList.size();
    }

    TIDTable = new int[transactionCounter][];
    CustomerID = new int[transactionCounter];

    for(customer=0; customer<customTransSet.getSize(); customer++) {
      transSet = customTransSet.getCustomerTransSet(customer);
      customerTransCounter = transSet.transactionList.size();
      // for all transactions
      for(i=0; i<customerTransCounter; i++, counter++) {
        transactionItemSet = (ItemSet)transSet.transactionList.get(i);
        transactionSize = transactionItemSet.getSize();
        TIDTable[counter] = new int[transactionSize];
        for(j=0; j<transactionSize; j++) {
          // TIDTable[counter][j] = reference to item L1
          TIDTable[counter][j] = findInArray( L, transactionItemSet.getItemAt(j) );
        }
        CustomerID[counter] = customer;
      }
    }
  }


  /**
   * Sort L1 (int[][1])
   *
   * @param arrayL - L1
   * @param arraySupp - supports for L1
   */
  private void sortArrays(int arrayL[][], int arraySupp[]) {
    int i, j, k, tmpVal, tmpSupp;
    int arraySize = arrayL.length;

    for(i=0; i < arraySize-1; i++) {
      for(j=0; j < arraySize-1-i; j++) {

        if(arrayL[j][0] > arrayL[j + 1][0]) {
          tmpVal = arrayL[j + 1][0];
          tmpSupp = arraySupp[j + 1];
          arrayL[j + 1][0] = arrayL[j][0];
          arraySupp[j + 1] = arraySupp[j];
          arrayL[j][0] = tmpVal;
          arraySupp[j] = tmpSupp;
        }
      }
    }
  }


  /**
   * Find in array int[][1]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -