⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 customersequentialalgorithm.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title:        XELOPES Data Mining Library
 * Description:  Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>;  <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.
 * Copyright:    Copyright (c) 2002 Prudential Systems Software GmbH
 * Company:      <a href="mailto:valentine.stepanenko@zsoft.ru">ZSoft, Spb, Russia</a>
 * @author Victor Borichev
 * @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
 * @version 1.0
 */

package com.prudsys.pdm.Models.CustomerSeq;

import java.util.Date;
import java.util.Hashtable;
import java.util.Vector;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.MiningSettings;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Models.CustomerSeq.Event.CreationModelEndMessageCustomerSequential;
import com.prudsys.pdm.Models.Sequential.SequentialMiningModel;

/**
 * Base class for customer sequential algorithms (i.e. sequential
 * basket analysis algorithms).
 */
public abstract class CustomerSequentialAlgorithm extends MiningAlgorithm {

  // -----------------------------------------------------------------------
  //  Variables declarations
  // -----------------------------------------------------------------------
  /** Item ID attribute. */
  protected CategoricalAttribute itemId;

  /** Customer ID attribute. */
  protected CategoricalAttribute customerId;

  /** Transaction position attribute. */
  protected NumericAttribute transactionPosition;

  /** Minimum support. */
  protected double minimumSupport;

  /** Minimum confidence. */
  protected double minimumConfidence;

  /** Generate rules from large customer sequences. */
  protected boolean generateRules;

  /** Export all transaction IDs into PMML. */
  protected boolean exportTransactIds = true;

  /** Export names of item ID, customer ID, and transaction position into PMML. */
  protected int exportTransactItemNames = SequentialMiningModel.EXPORT_PMML_NAME_TYPE_XELOPES;

  // -----------------------------------------------------------------------
  //  Constructor
  // -----------------------------------------------------------------------
  /**
   * Empty constructor.
   */
  public CustomerSequentialAlgorithm() {
  }

  // -----------------------------------------------------------------------
  //  Getter and setter methods
  // -----------------------------------------------------------------------
  /**
   * Write all transaction IDs into PMML (default: true)?
   *
   * @return true if write all transaction IDs into PMML, otherwise not
   */
  public boolean isExportTransactIds()
  {
    return exportTransactIds;
  }

  /**
   * Set export all transaction IDs into PMML (default: true).
   *
   * @param exportTransactIds true if export, otherwise false
   */
  public void setExportTransactIds(boolean exportTransactIds)
  {
    this.exportTransactIds = exportTransactIds;
  }

  /**
   * Returns type how item, customer, and transaction position IDs
   * are handled in PMML.
   *
   * @return PMML export type of item, customer, and transaction position IDs
   */
  public int getExportTransactItemNames()
  {
    return exportTransactItemNames;
  }

  /**
   * Sets type how item, customer, and transaction position IDs
   * are handled in PMML. This is because of an incompleteness in PMML 20:
   * item, customer, and transaction position ID are not specially
   * denoted in the mining schema.
   * This makes PMML20 sequence models not really applicable
   * to new data (except you use agreed names for the IDs). <p>
   *
   * There are two ways to handle this problem:
   * 1. Do nothing: conform with PMML 2.0 but lose of functionality,
   * 2. Use XELOPES PMML Extension: to SequenceModel three new
   * attributes 'itemIdName' (itemId), 'transactIdName' (customerId),
   * and 'positionIdName' (transactionPosition) are added.
   *
   * @param exportTransactItemNames PMML export type of item, customer,
   * and transaction position IDs
   */
  public void setExportTransactItemNames(int exportTransactItemNames)
  {
    this.exportTransactItemNames = exportTransactItemNames;
  }

  /**
   * Creates an instance of the customer sequential settings class that is required
   * to run the algorithm. The mining settings are assigned through the
   * setMiningSettings method.
   *
   * @return new instance of the customer sequential settings class of the algorithm
   */
  public MiningSettings createMiningSettings() {

    return new CustomerSequentialSettings();
  }

  /**
   * Sets customer sequential settings.
   *
   * @param miningSettings new customer sequential settings
   * @exception IllegalArgumentException mining settings not customer sequential settings
   */
  public void setMiningSettings( MiningSettings miningSettings ) throws IllegalArgumentException
  {
    if( miningSettings instanceof CustomerSequentialSettings )
    {
      super.setMiningSettings( miningSettings );
      CustomerSequentialSettings sequentialSettings = (CustomerSequentialSettings)miningSettings;
      this.customerId = (CategoricalAttribute)sequentialSettings.getCustomerId();
      this.itemId = (CategoricalAttribute)sequentialSettings.getItemId();
      this.transactionPosition = (NumericAttribute)sequentialSettings.getTransactionPosition();
      this.minimumSupport = sequentialSettings.getMinimumSupport();
      this.minimumConfidence = sequentialSettings.getMinimumConfidence();
      this.generateRules = sequentialSettings.isGenerateRules();
    }
    else
    {
      throw new IllegalArgumentException( "MiningSettings have to be CustomerSequentialSettings." );
    }
  }

  /**
   * Returns number of transactions, i.e. customers. Standard method uses
   * number of categories of customer ID attribute. However, for algorithms
   * that can also handle transaction ID attributes which do not store
   * all categories, this method should be overwritten.
   *
   * @return number of transactions (customers), -1 if unknown
   */
  public int getNumberOfTransactions()
  {
    int nTransact = -1;
    if ( customerId != null && !customerId.isUnstoredCategories() )
       nTransact = customerId.getCategoriesNumber();

    return nTransact;
  }

  /**
   * Returns sequential rules.
   *
   * @return large sequences
   */
  protected abstract Vector getSequentialRules();

  /**
   * Returns customer sequence rules. Only for algorithms returning
   * unpruned result. (All customer subsequences of customer sequences
   * are required.)
   *
   * @return customer sequence rules
   * @exception MiningException cannot generate rules
   */
  protected Vector getSequenceRules() throws MiningException {

    if (!generateRules)
      throw new MiningException("there should be no rules generated");

    // Construct hashtable of all large customer sequences:
    Hashtable seqs = new Hashtable();
    int num        = getSequentialRules().size();
    int nTransact  = getNumberOfTransactions();

    for (int i = 0 ; i < num; i++)
    {
      CustomSequence css = (CustomSequence) getSequentialRules().elementAt(i);
      Double Supp = (Double) seqs.get(css);
      if (Supp == null)
      {
        double supp = (double) css.getSupportCount() / (double) nTransact;
        seqs.put(css, new Double(supp) );
      };

      Supp = (Double) seqs.get(css);
    };

    // Find all rules satisfying minimum confidence condition:
    Vector sequenceRules = new Vector();
    for (int i = 0; i < num; i++) {
      CustomSequence css = (CustomSequence) getSequentialRules().elementAt(i);
      if (css.getSize() == 1) continue;

      // Get all rules for customer itemset:
      for (int j = 1; j < css.getSize(); j++) {
        // New rule:
        CustomSequence prem = new CustomSequence();
        CustomSequence conc = new CustomSequence();
        for (int k = 0; k < j; k++) prem.addItemSet( css.getItemSet(k) );
        for (int k = j; k < css.getSize(); k++) conc.addItemSet( css.getItemSet(k) );

        // Check confidence condition of new rule:
        Double SuppAUB = (Double) seqs.get(css);
        Double SuppA   = (Double) seqs.get(prem);
        if (SuppAUB == null || SuppA == null || SuppA.doubleValue() == 0) continue;

        double conf = SuppAUB.doubleValue() / SuppA.doubleValue();
        if (conf < minimumConfidence)
          continue;
        else {
          // Add new rule to list:
          CustomRuleSetSeq crss = new CustomRuleSetSeq(prem, conc, SuppAUB.doubleValue(), conf);
          sequenceRules.addElement(crss);
        };
      };
    };

    return sequenceRules;
  }

  // -----------------------------------------------------------------------
  //  Run customer sequential algorithm and build mining model
  // -----------------------------------------------------------------------
  /**
   * Runs sequential algorithm.
   *
   * @exception MiningException could not run algorithm
   */
  protected abstract void runAlgorithm() throws MiningException;

  /**
   * Builds mining model by running the customer sequential algorithm internally.
   *
   * @return customer sequential mining model generated by the algorithm
   * @exception MiningException could not build model
   */
  public MiningModel buildModel() throws MiningException
  {
      long start = ( new Date() ).getTime();

      runAlgorithm();

      CustomerSequentialMiningModel model = new CustomerSequentialMiningModel();
      model.setMiningSettings( miningSettings );
      model.setInputSpec( applicationInputSpecification );
      model.setSequentialRules( getSequentialRules() );
      if (generateRules) model.setSequenceRules( getSequenceRules() );
      model.setExportTransactIds(exportTransactIds);
      model.setExportTransactItemNames(exportTransactItemNames);
      if (getNumberOfTransactions() >= 0) model.setNumberOfTransactions( getNumberOfTransactions() );
      this.miningModel = model;

      long end = ( new Date() ).getTime();
      timeSpentToBuildModel = ( end - start ) / 1000.0;
      
      int nRules = model.getSequenceRules()!=null ? model.getSequenceRules().size() : 0;
      fireMiningEvent(new CreationModelEndMessageCustomerSequential(nRules, model.getSequentialRules().size(), getAlgorithmLevel()));
      
      return model;
  }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -