associationrulesdecompalgorithm.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 739 行 · 第 1/2 页
JAVA
739 行
/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.0
 */

package com.prudsys.pdm.Models.AssociationRules;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAlgorithmParameter;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningEvent;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningListener;
import com.prudsys.pdm.Core.Event.Status.CreationModelBeginMessage;
import com.prudsys.pdm.Input.MiningStoredData;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.Event.CreationModelEndMessageAssociationRules;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.IntHashtable;
import com.prudsys.pdm.Utils.IntVector;

/**
 * Wrapper for association rule algorithms based on a simple
 * horizontal decomposition method of the MiningInputStream
 * into stripes. This allows to apply association rules
 * algorithms also on data which does not fit completely
 * into memory.
 *
 * All stripes except the last one have the same length given by
 * the parameter 'decompSize' specifying the number of transactions
 * (not items!) contained in the stripes. The last stripe contains the remaining
 * transactions and its transaction length <= decompSize. In the first run
 * through the data, the association rules algorithm, which is specified
 * via the parameter 'algorithmName', is locally applied to all stripes
 * and the resulting rules are stored globally. In the second run,
 * the rules are pruned via calculating their global support / confidence
 * and removing all rules violating the global support / confidence condition.
 * No matter how the parameter 'decompSize' is chosen, the result will
 * be always the same.
 *
 * Three important remarks:
 * 1. Unlike most memory-based association rules algorithms in XELOPES,
 * this class requires that the MiningInputStream is already sorted
 * by the transaction ID attribute.
 * 2. An advantage over most memory-based association rules algorithms is
 * that this class can also handle transaction ID attributes of the
 * type 'unstoredCategories'. This means that categories (i.e. the transaction
 * identifiers) are not stored in the transaction ID attribute and hence
 * this memory is saved. This is especially useful when the algorithm
 * deals with very large datasets with millions of transactions.
 * 3. When choose 'decompSize' make sure that the last stripe is not
 * much smaller then the other ones. A too small last stripe would
 * result in a hugh number of locally calculated rules and extremely
 * slow down the work of the whole decomposition algorithm (especially
 * in the second - pruning - stage). You can also simply exclude the
 * last domain from local rule generation (see option 'skipLastDomain')
 * in the first stage. However, this may possibly reduce the set of all
 * globally found rules leading to an approximate solution.
 *
 * @author  Michael Thess
 * @version 1.1
 * @see     AssociationRulesAlgorithm
 */
public class AssociationRulesDecompAlgorithm extends AssociationRulesAlgorithm implements MiningListener
{
    /** Maximum number of transactions in decomposition. */
    private int decompSize = 10000;

    /** Name of local association rules algorithm from 'algorithms.xml'. */
    private String algorithmName = "AprioriOptimized";

    /** Approximate solution (exclude last domain from local rule generation). */
    private boolean skipLastDomain = false;

    /** Minimum item size. */
    private int minimumItemSize = 1;

    /** Maximum item size. */
    private int maximumItemSize = -1;

    /** Local transaction ID attribute. */
    private CategoricalAttribute transactIdLoc;

    /** Global number of all transactions. */
    private int numberOfTransactions = -1;

    /** Hashtable of large itemsets. */
    private Hashtable lits = new Hashtable();

    /** Hashtable of association rules. */
    private Hashtable rules = new Hashtable();

    /** Hashtable of large items. Points to itemTrans.  */
    private IntHashtable largeItems = new IntHashtable();

    /** Vector of all transactions containing a large item. */
    private IntVector[] itemTrans;

    /** Debug level. */
    private int debug = 1;

    public AssociationRulesDecompAlgorithm() {
      
      this.addMiningListener(this);
    }

    public void processMiningEvent(MiningEvent e) {
    }
    
    /**
     * Runs association rules algorithm.
     *
     * @exception MiningException connot run algorithm
     */
    protected void runAlgorithm() throws MiningException {

      // Init:
      lits.clear();
      rules.clear();
      largeItems.clear();

      // Construct local metadata:
      MiningDataSpecification locMetaData = new MiningDataSpecification();
      locMetaData.addMiningAttribute( categoryItemId );
      transactIdLoc = categoryTransactId;
      if ( categoryTransactId.isUnstoredCategories() )  // use local transact attribute
        transactIdLoc = new CategoricalAttribute( categoryTransactId.getName() );
      locMetaData.addMiningAttribute( transactIdLoc );
      locMetaData.setRelationName("local meta data");

      /************************************************************************/
      /* First scan over data to construct local rules...                     */
      /************************************************************************/
      // Reset mining input stream:
      miningInputStream.reset();

      // MAIN LOOP:
      int nDecomp          = 0;                // number of decompositions
      int nLocTrans        = 0;                // counter of local transactions
      boolean firstItem    = true;             // first line
      Category prevTransId = new Category(""); // previous transaction ID
      ArrayList transact   = new ArrayList();  // current transaction (= itemset)
      ArrayList locMinVec  = new ArrayList();  // current decomposition set

      fireMiningEvent(new CreationModelBeginMessage(this.getAlgorithmLevel()));

      while (miningInputStream.next()) {

        // Read new mining vector:
        MiningVector vector = miningInputStream.read();

        // Empty line => ignore line:
        if (vector == null)
          continue;
        // Get item and transaction IDs:
        double itemId        = vector.getValue( categoryItemId );
        double transId       = vector.getValue( categoryTransactId );
        Category currTransId = categoryTransactId.getCategory(transId);
        // Missing value => ignore line:
        if ( Category.isMissingValue(itemId) || currTransId == null )
          continue;

        // First item:
        if (firstItem) {
          prevTransId = currTransId;
          firstItem   = false;
        };
        // New transaction:
        if ( !currTransId.equals(prevTransId) ) {

          // Add to local transaction set:
          locMinVec.addAll(transact);
          nLocTrans = nLocTrans + 1;

          // New decomposition domain => run algorithm:
          if (nLocTrans == decompSize) {
            MiningStoredData msd              = new MiningStoredData(locMinVec);
            AssociationRulesMiningModel model = runAlgorithmLocal(msd);
            addLargeItemSets( model.getLargeItemSets() );
            addAssociationRules( model.getAssociationRules() );
            nDecomp   = nDecomp + 1;

            nLocTrans = 0;
            locMinVec.clear();
            if ( categoryTransactId.isUnstoredCategories() )
              transactIdLoc.removeValues();
            if (debug > 0) System.out.println("iDecomp = " + nDecomp + " =========>");
          };

          prevTransId = currTransId;
          transact.clear();
        };

        // Add item to transaction:
        if ( categoryTransactId.isUnstoredCategories() ) {
          transId = transactIdLoc.getKey(currTransId);
          if (Category.isMissingValue(transId) )
            transId = transactIdLoc.addCategory(currTransId);
        };
        double[] vec    = {itemId, transId};
        MiningVector mv = new MiningVector(vec);
        mv.setMetaData(locMetaData);
        transact.add(mv);
      };
      // Last transaction and last algorithm run:
      if (transact.size() > 0 && (!skipLastDomain || nDecomp == 0) ) {
        locMinVec.addAll(transact);
        nLocTrans = nLocTrans + 1;

        MiningStoredData msd              = new MiningStoredData(locMinVec);
        AssociationRulesMiningModel model = runAlgorithmLocal(msd);
        addLargeItemSets( model.getLargeItemSets() );
        addAssociationRules( model.getAssociationRules() );
        nDecomp = nDecomp + 1;
      };
      if ( categoryTransactId.isUnstoredCategories() )
        transactIdLoc.removeValues();

      if (debug > 0)
        System.out.println("nDecomp = " + nDecomp + " --------------------");
      /************************************************************************/
      /* ... first scan over data to construct local rules.                   */
      /************************************************************************/

      // Fill hashtable of all one-element large itemsets:
      int nLit = 0;
      Enumeration em = lits.keys();
      while (em.hasMoreElements() ) {
        ItemSet is   = (ItemSet) em.nextElement();
        int itemSize = is.getSize();

        is.setSupportCount(0);  // clear support count which was local

        for (int j = 0; j < itemSize; j++) {
          int pN = is.getIntegerAt(j);
          if (largeItems.get(pN) == null) {
            largeItems.put(pN, nLit);
            nLit = nLit + 1;
          };
        };
      };

      // Initialize transaction vector:
      itemTrans = new IntVector[nLit];

      /************************************************************************/
      /* Second scan over data to get global rules by pruning...              */
      /************************************************************************/
      // Reset mining input stream:
      miningInputStream.reset();

      // MAIN LOOP:
      int nTrans  = 0;                // number of all transactions
      nDecomp     = 0;                // number of decompositions
      nLocTrans   = 0;                // counter of local transactions
      firstItem   = true;             // first line
      prevTransId = new Category(""); // previous transaction ID
      transact    = new ArrayList();  // current transaction (= itemset)
      locMinVec   = new ArrayList();  // current decomposition set
      while (miningInputStream.next()) {

        // Read new mining vector:
        MiningVector vector = miningInputStream.read();

        // Empty line => ignore line:
        if (vector == null)
          continue;
        // Get item and transaction IDs:
        double itemId        = vector.getValue( categoryItemId );
        double transId       = vector.getValue( categoryTransactId );
        Category currTransId = categoryTransactId.getCategory(transId);
        // Missing value => ignore line:
        if ( Category.isMissingValue(itemId) || currTransId == null )
          continue;

        // First item:
        if (firstItem) {
          prevTransId = currTransId;
          firstItem   = false;
        };
        // New transaction:
        if ( !currTransId.equals(prevTransId) ) {

          // Add to local transaction set:
          locMinVec.addAll(transact);
          nLocTrans = nLocTrans + 1;

          // New decomposition domain => update support and confidence numbers:
          if (nLocTrans == decompSize) {
            MiningStoredData msd = new MiningStoredData(locMinVec);
            updateSuppConf(msd);
            nTrans    = nTrans + nLocTrans;
            nDecomp   = nDecomp + 1;

            nLocTrans = 0;
            locMinVec.clear();
            if ( categoryTransactId.isUnstoredCategories() )
              transactIdLoc.removeValues();
            if (debug > 0) System.out.println("iDecomp = " + nDecomp + " <========");
          };

          prevTransId = currTransId;
          transact.clear();
        };

        // Add item to transaction:
        if ( categoryTransactId.isUnstoredCategories() ) {
          transId = transactIdLoc.getKey(currTransId);
          if (Category.isMissingValue(transId) )
            transId = transactIdLoc.addCategory(currTransId);
        };
        double[] vec    = {itemId, transId};
        MiningVector mv = new MiningVector(vec);
        mv.setMetaData(locMetaData);
        transact.add(mv);
      };
      // Last transaction and last update support and confidence numbers:
      if (transact.size() > 0) {
        locMinVec.addAll(transact);
        nLocTrans = nLocTrans + 1;
        nTrans    = nTrans + nLocTrans;
        nDecomp   = nDecomp + 1;

        MiningStoredData msd = new MiningStoredData(locMinVec);
        updateSuppConf(msd);
      };
      numberOfTransactions = nTrans;
      /************************************************************************/
      /* ...second scan over data to get global rules by pruning.             */
      /************************************************************************/

      // Remove all large itemsets with too small support:
      int minSupp = (int) (minimumSupport*nTrans + 0.9999999);
      em        = lits.keys();
      while (em.hasMoreElements() ) {
        ItemSet is = (ItemSet) em.nextElement();
        int suppCt = is.getSupportCount();
        if (suppCt < minSupp)
          lits.remove(is);
        else
          lits.put(is, new Integer(suppCt));
      };

      // Remove all rules with too small support or confidence:
      em = rules.keys();
      while (em.hasMoreElements()) {
        // Get rule:
        RuleSet rs = (RuleSet) em.nextElement();

        // Get premise and conclusion if large itemsets:
        ItemSet prem  = rs.getPremise();
        ItemSet conc  = rs.getConclusion();
        Integer SuppA = (Integer) lits.get(prem);
        Integer SuppB = (Integer) lits.get(conc);
        if (SuppA == null || SuppB == null) {
          rules.remove(rs);
          continue;
        };
associationrulesdecompalgorithm.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 associationrulesdecompalgorithm.java 源码文件，采用 Java 编程语言编写，共 739 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?