⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sequential.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title:        prudsys basket analysis
 * Description:  Basket analysis algorithms
 * Copyright:    Copyright (c) 2001
 * Company:      PRUDENTIAL SYSTEMS SOFTWARE GmbH
 * @author Michael Thess
 * @version 1.0
 */

package com.prudsys.pdm.Models.Sequential.Algorithms.SeqSimple;

import java.util.Collection;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;

import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
import com.prudsys.pdm.Models.Sequential.ItemSetSeq;
import com.prudsys.pdm.Models.Sequential.SequentialAlgorithm;
import com.prudsys.pdm.Utils.IntHashtable;
import com.prudsys.pdm.Utils.IntVector;

/**
 * Simple sequential basket analysis algorithm based on Apriori.
 * Algorithm did not find cycles.
 *
 * @author Michael Thess
 * @version 1.1, 2001/05/07
 */
public class Sequential extends SequentialAlgorithm {

  /**
   * Debugging.
   */
  private static final boolean debug = false;

  /** Minimum number of supporting transactions, only used if >0. */
  private int minimumSupportCount = 0;

  /** Minimum item size. */
  private int    minimumItemSize = 1;

  /** Maximum item size. */
  private int    maximumItemSize = -1;

  /** Sequential rules. */
  private Vector sequentialRules;

  /** Sequential transaction set. */
  private TransactionSetSeq transSet = null;

  /**
   * Empty constructor.
   */
  public Sequential() {
  }

  /**
   * Returns vector of sequential rules.
   *
   * @return vector of sequential rules (ItemSetSeq)
   */
  public Vector getSequentialRules()
  {
     return sequentialRules;
  }

  /**
   * Sets minimum item size.
   *
   * @param minimumItemSize minimum item size
   */
  public void setMinimumItemSize(int minimumItemSize)
  {
    this.minimumItemSize = minimumItemSize;
  }

  /**
   * Sets maximum item size (-1: unbounded).
   *
   * @param maximumItemSize maximum item size
   */
  public void setMaximumItemSize(int maximumItemSize)
  {
    this.maximumItemSize = maximumItemSize;
  }

  /**
   * Returns minimum number of transactions containing item pairs.
   * The relative minimum support value is defined by the minimumSupport
   * parameter.
   *
   * @return minimum support count
   */
  public int getMinimumSupportCount()
  {
    return minimumSupportCount;
  }

  /**
   * Sets minimum number of transactions containing item pairs.
   * The relative minimum support value is defined by the minimumSupport
   * parameter.
   *
   * @param minimumSupportCount new minimum support count
   */
  public void setMinimumSupportCount(int minimumSupportCount)
  {
    this.minimumSupportCount = minimumSupportCount;
  }

  /**
   * Checks mining algorithm for completeness by calling verify method
   * of superclass. Adiitionally, it checks whether minimumItemSize and
   * maximumItemSize are admissible.
   *
   * @throws IllegalArgumentException if some algorithm attributes are incorrect
   */
  public void verify() throws IllegalArgumentException
  {
    super.verify();
    if (minimumItemSize < 0)
      throw new IllegalArgumentException("minimumItemSize can't be negative");
    if (maximumItemSize > -1 && minimumItemSize > maximumItemSize)
      throw new IllegalArgumentException("minimumItemSize can't be larger than maximumItemSize");
  }

  /**
   * Implements the Algorithm Sequential (without cycles). If a transactionId, itemId,
   * or itemIndex has a missing value, the (transactionId, itemId, itemIndex)-tuple is ignored.
   */
  public void runAlgorithm() throws MiningException, IllegalArgumentException
  {
    // Convert input stream to sequential transaction set when called for first time:
    if (transSet == null)
    {
       transSet = convertMiningInputStream( miningInputStream );
    };

    // Run algorithm:
    SequentialResult sqRes = SequentialAlg();

    // Transform result:
    sequentialRules = new Vector();
    ItemSetSeqList isl  = sqRes.getLargeSequences();
    for (int i = 0; i < isl.getSize(); i++)
      sequentialRules.add( isl.getItemSetSeqAt(i) );
  }

  /**
   * Create sequential transaction set from input stream.
   *
   * @param miningInputStream mining input stream to be converted
   * @return sequential transaction set of mining input stream
   */
  private TransactionSetSeq convertMiningInputStream( MiningInputStream miningInputStream ) throws MiningException
  {
        Hashtable transactions = new Hashtable();
        while(miningInputStream.next())
        {
            MiningVector vector = miningInputStream.read();

            double transId = vector.getValue(transactionId);
            double itId = vector.getValue(itemId);
            double itIndex = vector.getValue(itemIndex);

            // Missing value => ignore line:
            if ( Category.isMissingValue(transId) || Category.isMissingValue(itId) || Category.isMissingValue(itIndex) )
              continue;

            int item = (int) itId;
            Double value = new Double(transId);
            Vector sequence = (Vector)transactions.get(value);
            if(sequence == null)
            {
                sequence = new Vector();
                transactions.put(value,sequence);
            }
            sequence.add(new Item(item,(int)itIndex));
        }
        Collection coll = transactions.values();
        Iterator it = coll.iterator();
        TransactionSetSeq tss = new TransactionSetSeq();
        while(it.hasNext())
        {
            Vector sequence = (Vector)it.next();
            ItemSetSeq iss = new ItemSetSeq();
            Object[] seq = sequence.toArray();
            int min, element;
            for(int j=0;j<seq.length;j++)
            {
                min = ((Item)seq[j]).getIndex(); element = -1;
                for(int k=j+1;k<seq.length;k++)
                {
                    int index = ((Item)seq[k]).getIndex();
                    if(index < min)
                    {
                        min = index; element = k;
                    }
                }
                if(element!=-1)
                {
                    Object swap = seq[j];
                    seq[j] = seq[element];
                    seq[element] = swap;
                }
                iss.addItem(((Item)seq[j]).getItem());
            }
            tss.addTransaction(iss);
        }
        return tss;
  }

  /**
   * Sequential basket analysis algorithm of Michael Thess.
   *
   * @return result as list of sequential item sets
   */
  public SequentialResult SequentialAlg() {


    // Copy to transaction set:
    TransactionSet ts;
    ts = new TransactionSet();   // First ts is used for usual transactions
    for (int i = 0; i < transSet.getSize(); i++) {
      ItemSet is     = new ItemSet();
      ItemSetSeq iss = (ItemSetSeq) transSet.getItemSetAt(i);
      for (int j = 0; j < iss.getSize(); j++)
        is.addItem( iss.getItemAt(j) );
      ts.addTransaction(is);
    }
    if (debug) {
      transSet.print();
      ts.print();
    };

    // Apply Apriori algorithm for large itemsets:
    ItemSetList largeItemSetList = Apriori.AprioriAlgLITS((TransactionSet) ts, minimumSupportCount, minimumSupport,
                                                          1, maximumItemSize);
    if (debug)
    {
      showLITS(largeItemSetList, ts.getSize());
    }

    // First find all items in large itemsets and write them into hash table:
    IntHashtable largeItems = new IntHashtable();
    int nLITS = largeItemSetList.getSize();  // number of large itemsets
    int nLit  = 0;                           // number of large items
    for (int i = 0; i < nLITS; i++) {
      ItemSet is   = (ItemSet) largeItemSetList.getItemSetAt(i);
      int itemSize = is.getSize();

      for (int j = 0; j < itemSize; j++) {
        int pN = is.getIntegerAt(j);
        if (largeItems.get(pN) == null) {
          largeItems.put(pN, nLit);
          nLit = nLit + 1;
        };
      };
    };
    // Vector containing all transactions for all itemsets:
    IntVector[] itemTrans = new IntVector[nLit];
    for (int i = 0; i < nLit; i++)
      itemTrans[i] = new IntVector();

    // Show large items:
    if (debug)
    {
      System.out.println("Large items:");
      showLargeItemHash(largeItems, itemTrans);
    }

    // Now take all items from sequential transaction set which are
    // large items and copy to transaction list 'ts':
    TransactionSetSeq tss;
    tss = new TransactionSetSeq();   // Now ts is used for sequential transactions
    for (int i = 0; i < transSet.getSize(); i++) {

      ItemSetSeq iss   = (ItemSetSeq) transSet.getItemSetAt(i);
      ItemSetSeq isnew = new ItemSetSeq();

      // Copy iss => isnew:
      int prev_item = -1;
      for (int j = 0; j < iss.getSize(); j++) {
        int item      = iss.getItemAt(j);
        Integer Item  = largeItems.get(item);   // num of large item

        // Item is large item => remains part of transaction, if not doubled:
        if (Item != null && item != prev_item) {
          isnew.addItem(item);

          // Add to transaction vector if still not contained:
          int ind = Item.intValue();
          boolean cont = false;
          for (int k = 0; k < itemTrans[ind].size(); k++)
            if (itemTrans[ind].IntegerAt(k) == i)
              cont = true;
          if (!cont)
            itemTrans[ind].addElement(i);
        };

        prev_item = item;
      };
      // Add to transaction list:
      tss.addTransaction(isnew);
    };

    // Show transactions, large items with transactions:
    if (debug) {
      System.out.println("TS w/o nonlarge items");
      ts.print();
      showLargeItemHash(largeItems, itemTrans);
    };

    // Calculate and get minimum support count:
    if (minimumSupportCount <= 0)
      tss.setMinSuppCount(minimumSupport);
    else
      tss.setMinSuppCount(minimumSupportCount);
    int minSuppCount = tss.getMinSuppCount();
    if (debug)
      System.out.println("minSuppCount = " + minSuppCount);

    // Create list of sequential itemsets:
    ItemSetSeqList sequenceList = new ItemSetSeqList();

    // Check sequential items and add to list:
    for (int i = 0; i < nLITS; i++) {
      // Large itemset:
      ItemSet is   = (ItemSet) largeItemSetList.getItemSetAt(i);
      int itemSize = is.getSize();

      // Filter itemsets of appropriate size:
      if (itemSize < minimumItemSize)
        continue;

      // Items for testing:
      int[] getItems = new int[itemSize];
      for (int j = 0; j < itemSize; j++)
        getItems[j] = is.getIntegerAt(j);

      // Get all transactions including these numbers:
      IntVector transNr = getTransactNumbers(getItems, largeItems, itemTrans);

      if (debug) {
        System.out.print("\n L I T S: ");
        for (int j = 0; j < itemSize; j++)
          System.out.println(getItems[j] + " ");
        System.out.println();
        System.out.println("Numbers of transactions including this LITS: ");
        for (int j = 0; j < transNr.size(); j++)
          System.out.print(transNr.IntegerAt(j) + " ");
        System.out.println();
      };

      // Create all permutations:
      ItemSetSeq[] testItems = getISSbyPermut(getItems);

      // Get support count of testing itemset:
      for (int j = 0; j < testItems.length; j++) {

        // Show testing itemset:
        if (debug) {
          System.out.println("Candidate item order:");
          testItems[j].print();
        };

        // Get support count:
        int suppCount = getSupportCount(testItems[j], (TransactionSetSeq) tss, transNr);
        if (debug)
          System.out.println("suppCount = " + suppCount);

        // Support constraint if satisfied:
        if (suppCount >= minSuppCount) {
          testItems[j].setSupportCount(suppCount);
          sequenceList.addItemSetSeq(testItems[j]);
        };

      };
    };
/*************************** END OF SEQUENCE PHASE *****************************/
/*******************************************************************************/

    // Show result:
    if (debug) {
      System.out.println("\n R E S U L T:");
      sequenceList.print();
    };

    SequentialResult seqRes = new SequentialResult(sequenceList);

    return seqRes;
  }

  /**
   * Get support count for sequential itemset.
   *
   * @param testIss sequential itemset
   * @param transSet sequential transaction set
   * @param tnr numbers of transactions to be tested
   * @return support count
   */
  private static int getSupportCount(ItemSetSeq testIss, TransactionSetSeq transSet, IntVector tnr) {

    int nsupp = 0;

    // Data of test itemset:
    int t_size   = testIss.getSize();
    int[] titems = new int[t_size];
    for (int i = 0; i < t_size; i++)
      titems[i] = testIss.getItemAt(i);

    // Loop over all transactions:
    for (int i = 0; i < tnr.size(); i++) {

      ItemSetSeq iss = transSet.getItemSetAt( tnr.IntegerAt(i) );

      // Data of transaction itemset:
      int i_size     = iss.getSize();
      int[] items    = new int[i_size];
      for (int j = 0; j < i_size; j++)
        items[j] = iss.getItemAt(j);

      // Searching:
      int[] isp = new int[t_size];
      for (int j = 0; j < t_size; j++)
        isp[j] = -1;

      boolean match = false;
      while (true) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -