📄 customersequentialalgorithm.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>; <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: <a href="mailto:valentine.stepanenko@zsoft.ru">ZSoft, Spb, Russia</a>
* @author Victor Borichev
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @version 1.0
*/
package com.prudsys.pdm.Models.CustomerSeq;
import java.util.Date;
import java.util.Hashtable;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.MiningSettings;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Models.CustomerSeq.Event.CreationModelEndMessageCustomerSequential;
import com.prudsys.pdm.Models.Sequential.SequentialMiningModel;
/**
* Base class for customer sequential algorithms (i.e. sequential
* basket analysis algorithms).
*/
public abstract class CustomerSequentialAlgorithm extends MiningAlgorithm {
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Item ID attribute. */
protected CategoricalAttribute itemId;
/** Customer ID attribute. */
protected CategoricalAttribute customerId;
/** Transaction position attribute. */
protected NumericAttribute transactionPosition;
/** Minimum support. */
protected double minimumSupport;
/** Minimum confidence. */
protected double minimumConfidence;
/** Generate rules from large customer sequences. */
protected boolean generateRules;
/** Export all transaction IDs into PMML. */
protected boolean exportTransactIds = true;
/** Export names of item ID, customer ID, and transaction position into PMML. */
protected int exportTransactItemNames = SequentialMiningModel.EXPORT_PMML_NAME_TYPE_XELOPES;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public CustomerSequentialAlgorithm() {
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Write all transaction IDs into PMML (default: true)?
*
* @return true if write all transaction IDs into PMML, otherwise not
*/
public boolean isExportTransactIds()
{
return exportTransactIds;
}
/**
* Set export all transaction IDs into PMML (default: true).
*
* @param exportTransactIds true if export, otherwise false
*/
public void setExportTransactIds(boolean exportTransactIds)
{
this.exportTransactIds = exportTransactIds;
}
/**
* Returns type how item, customer, and transaction position IDs
* are handled in PMML.
*
* @return PMML export type of item, customer, and transaction position IDs
*/
public int getExportTransactItemNames()
{
return exportTransactItemNames;
}
/**
* Sets type how item, customer, and transaction position IDs
* are handled in PMML. This is because of an incompleteness in PMML 20:
* item, customer, and transaction position ID are not specially
* denoted in the mining schema.
* This makes PMML20 sequence models not really applicable
* to new data (except you use agreed names for the IDs). <p>
*
* There are two ways to handle this problem:
* 1. Do nothing: conform with PMML 2.0 but lose of functionality,
* 2. Use XELOPES PMML Extension: to SequenceModel three new
* attributes 'itemIdName' (itemId), 'transactIdName' (customerId),
* and 'positionIdName' (transactionPosition) are added.
*
* @param exportTransactItemNames PMML export type of item, customer,
* and transaction position IDs
*/
public void setExportTransactItemNames(int exportTransactItemNames)
{
this.exportTransactItemNames = exportTransactItemNames;
}
/**
* Creates an instance of the customer sequential settings class that is required
* to run the algorithm. The mining settings are assigned through the
* setMiningSettings method.
*
* @return new instance of the customer sequential settings class of the algorithm
*/
public MiningSettings createMiningSettings() {
return new CustomerSequentialSettings();
}
/**
* Sets customer sequential settings.
*
* @param miningSettings new customer sequential settings
* @exception IllegalArgumentException mining settings not customer sequential settings
*/
public void setMiningSettings( MiningSettings miningSettings ) throws IllegalArgumentException
{
if( miningSettings instanceof CustomerSequentialSettings )
{
super.setMiningSettings( miningSettings );
CustomerSequentialSettings sequentialSettings = (CustomerSequentialSettings)miningSettings;
this.customerId = (CategoricalAttribute)sequentialSettings.getCustomerId();
this.itemId = (CategoricalAttribute)sequentialSettings.getItemId();
this.transactionPosition = (NumericAttribute)sequentialSettings.getTransactionPosition();
this.minimumSupport = sequentialSettings.getMinimumSupport();
this.minimumConfidence = sequentialSettings.getMinimumConfidence();
this.generateRules = sequentialSettings.isGenerateRules();
}
else
{
throw new IllegalArgumentException( "MiningSettings have to be CustomerSequentialSettings." );
}
}
/**
* Returns number of transactions, i.e. customers. Standard method uses
* number of categories of customer ID attribute. However, for algorithms
* that can also handle transaction ID attributes which do not store
* all categories, this method should be overwritten.
*
* @return number of transactions (customers), -1 if unknown
*/
public int getNumberOfTransactions()
{
int nTransact = -1;
if ( customerId != null && !customerId.isUnstoredCategories() )
nTransact = customerId.getCategoriesNumber();
return nTransact;
}
/**
* Returns sequential rules.
*
* @return large sequences
*/
protected abstract Vector getSequentialRules();
/**
* Returns customer sequence rules. Only for algorithms returning
* unpruned result. (All customer subsequences of customer sequences
* are required.)
*
* @return customer sequence rules
* @exception MiningException cannot generate rules
*/
protected Vector getSequenceRules() throws MiningException {
if (!generateRules)
throw new MiningException("there should be no rules generated");
// Construct hashtable of all large customer sequences:
Hashtable seqs = new Hashtable();
int num = getSequentialRules().size();
int nTransact = getNumberOfTransactions();
for (int i = 0 ; i < num; i++)
{
CustomSequence css = (CustomSequence) getSequentialRules().elementAt(i);
Double Supp = (Double) seqs.get(css);
if (Supp == null)
{
double supp = (double) css.getSupportCount() / (double) nTransact;
seqs.put(css, new Double(supp) );
};
Supp = (Double) seqs.get(css);
};
// Find all rules satisfying minimum confidence condition:
Vector sequenceRules = new Vector();
for (int i = 0; i < num; i++) {
CustomSequence css = (CustomSequence) getSequentialRules().elementAt(i);
if (css.getSize() == 1) continue;
// Get all rules for customer itemset:
for (int j = 1; j < css.getSize(); j++) {
// New rule:
CustomSequence prem = new CustomSequence();
CustomSequence conc = new CustomSequence();
for (int k = 0; k < j; k++) prem.addItemSet( css.getItemSet(k) );
for (int k = j; k < css.getSize(); k++) conc.addItemSet( css.getItemSet(k) );
// Check confidence condition of new rule:
Double SuppAUB = (Double) seqs.get(css);
Double SuppA = (Double) seqs.get(prem);
if (SuppAUB == null || SuppA == null || SuppA.doubleValue() == 0) continue;
double conf = SuppAUB.doubleValue() / SuppA.doubleValue();
if (conf < minimumConfidence)
continue;
else {
// Add new rule to list:
CustomRuleSetSeq crss = new CustomRuleSetSeq(prem, conc, SuppAUB.doubleValue(), conf);
sequenceRules.addElement(crss);
};
};
};
return sequenceRules;
}
// -----------------------------------------------------------------------
// Run customer sequential algorithm and build mining model
// -----------------------------------------------------------------------
/**
* Runs sequential algorithm.
*
* @exception MiningException could not run algorithm
*/
protected abstract void runAlgorithm() throws MiningException;
/**
* Builds mining model by running the customer sequential algorithm internally.
*
* @return customer sequential mining model generated by the algorithm
* @exception MiningException could not build model
*/
public MiningModel buildModel() throws MiningException
{
long start = ( new Date() ).getTime();
runAlgorithm();
CustomerSequentialMiningModel model = new CustomerSequentialMiningModel();
model.setMiningSettings( miningSettings );
model.setInputSpec( applicationInputSpecification );
model.setSequentialRules( getSequentialRules() );
if (generateRules) model.setSequenceRules( getSequenceRules() );
model.setExportTransactIds(exportTransactIds);
model.setExportTransactItemNames(exportTransactItemNames);
if (getNumberOfTransactions() >= 0) model.setNumberOfTransactions( getNumberOfTransactions() );
this.miningModel = model;
long end = ( new Date() ).getTime();
timeSpentToBuildModel = ( end - start ) / 1000.0;
int nRules = model.getSequenceRules()!=null ? model.getSequenceRules().size() : 0;
fireMiningEvent(new CreationModelEndMessageCustomerSequential(nRules, model.getSequentialRules().size(), getAlgorithmLevel()));
return model;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -