📄 customersequentialminingmodel.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>; <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: <a href="mailto:valentine.stepanenko@zsoft.ru">ZSoft, Spb, Russia</a>
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @version 1.0
*/
package com.prudsys.pdm.Models.CustomerSeq;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import com.prudsys.pdm.Adapters.PmmlVersion20.AntecedentSequence;
import com.prudsys.pdm.Adapters.PmmlVersion20.ConsequentSequence;
import com.prudsys.pdm.Adapters.PmmlVersion20.DataDictionary;
import com.prudsys.pdm.Adapters.PmmlVersion20.Delimiter;
import com.prudsys.pdm.Adapters.PmmlVersion20.Header;
import com.prudsys.pdm.Adapters.PmmlVersion20.Item;
import com.prudsys.pdm.Adapters.PmmlVersion20.ItemRef;
import com.prudsys.pdm.Adapters.PmmlVersion20.Itemset;
import com.prudsys.pdm.Adapters.PmmlVersion20.MiningSchema;
import com.prudsys.pdm.Adapters.PmmlVersion20.PMML;
import com.prudsys.pdm.Adapters.PmmlVersion20.Sequence;
import com.prudsys.pdm.Adapters.PmmlVersion20.SequenceModel;
import com.prudsys.pdm.Adapters.PmmlVersion20.SequenceReference;
import com.prudsys.pdm.Adapters.PmmlVersion20.SequenceRule;
import com.prudsys.pdm.Adapters.PmmlVersion20.SetReference;
import com.prudsys.pdm.Adapters.PmmlVersion20.TransformationDictionary;
import com.prudsys.pdm.Core.ApplicationInputSpecification;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningMatrixElement;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Core.OrdinalAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningStoredData;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
import com.prudsys.pdm.Models.Sequential.SequentialMiningModel;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Description of data produced by a customer sequential mining function. <p>
*
* From PDM CWM extension. <p>
*
* Superclasses:
* <ul>
* <li> SequentialMiningModel
* </ul>
*
* In addition, functionality from PMML was added.
* It corresponds to the PMML element SequenceModel.
*
* @see SequentialMiningModel
* @see com.prudsys.pdm.Adapters.PmmlVersion20.SequenceModel
* @see CustomRuleSetSeq
* @see CustomSequence
* @see ItemSet
*/
public class CustomerSequentialMiningModel extends SequentialMiningModel {
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Hashtable of all large customer sequences. */
private Hashtable clits = null;
/** Hashtable of all recommendations. */
private Hashtable recs = null;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Constructor sets function and algorithm parameters.
*/
public CustomerSequentialMiningModel()
{
function = MiningModel.CUSTOMER_SEQUENTIAL_FUNCTION;
algorithm = MiningModel.SEQUENTIAL_BASKET_ANALYSIS_ALGORITHM;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns number of transactions, i.e. customers. If not specified explicitely,
* it is calculated using the number of categories of the customer IDs
* attribute (where the paramter unstoredCategories must be false).
*
* @return number of transactions (customers), -1 if unknown
*/
public int getNumberOfTransactions()
{
int nTransact = numberOfTransactions;
if (nTransact < 0) {
CategoricalAttribute transactId = (CategoricalAttribute)
((CustomerSequentialSettings) miningSettings).getCustomerId();
if ( transactId != null && !transactId.isUnstoredCategories() )
nTransact = transactId.getCategoriesNumber();
};
return nTransact;
}
// -----------------------------------------------------------------------
// Application of model to new data
// -----------------------------------------------------------------------
/**
* Builds hashtable for fast access to large customer sequences.
*
* @exception MiningException cannot build hashtable for large sequences
*/
public void buildLargeSequences() throws MiningException {
clits = new Hashtable();
int num = getSequentialRules().size();
int nTransact = getNumberOfTransactions();
for (int i = 0 ; i < num; i++)
{
CustomSequence cs = (CustomSequence) getSequentialRules().elementAt(i);
Double Supp = (Double) clits.get(cs);
if (Supp == null)
{
double supp = (double) cs.getSupportCount() / (double) nTransact;
clits.put(cs, new Double(supp) );
};
};
}
/**
* Calculates coverage of custom sequence rule. Coverage is the
* proportion of transactions covered by the premise itemset. Equivalentely,
* this is the quotient of support and confidence of the rule.
*
* @param crss custom sequence rule
* @return coverage number from interval [0,1]
* @exception MiningException cannot calculate coverage
*/
public double coverage(CustomRuleSetSeq crss) throws MiningException {
double supp = crss.getSupport();
double conf = crss.getConfidence();
if ( Math.abs(conf) <= 1.0e-20 )
return 0;
return supp/conf;
}
/**
* Calculates lift of custom sequence rule. Lift is the confidence
* divided by the proportion of all examples that are covered by
* the conclusion. <p>
*
* If no valid lift value is contained in the rule, the method
* calculates it from the large customer sequences. In this case, the method
* 'buildLargeSquences' is used.
*
* @param crss customer sequence rule
* @return lift number
* @exception MiningException MiningException missing lift value in rule
* and empty hashtable of large sequences
*/
public double lift(CustomRuleSetSeq crss) throws MiningException {
double lift = crss.getLift();
if ( !Category.isMissingValue(lift) )
return lift;
if (clits == null)
buildLargeSequences();
double conf = crss.getConfidence();
CustomSequence conc = crss.getConclusion();
Double Conc = (Double)clits.get(conc);
if (Conc == null)
return 0.0;
double cc = Conc.doubleValue();
lift = 0.0;
if ( Math.abs(cc) > 1.0e-20 )
lift = conf / cc;
return lift;
}
/**
* Calculates cosine measure of customer sequence rule. Cosine is the square
* root of the product of times and lift values. Cosine is very similar to
* lift but in the range [0,1]. It comes from collaborative filtering
* methods. <p>
*
* @param rs customer sequence rule
* @return cosine value
* @exception MiningException cannot calculate lift
*/
public double cosine(CustomRuleSetSeq rs) throws MiningException {
double lift = lift(rs);
double supp = rs.getSupport();
double cos = Math.sqrt(lift*supp);
return cos;
}
/**
* Reorganize sequence rule model for beeing used as
* recommendation engine via the apply method.
*
* @exception MiningException cannot build recommendations
*/
public void buildRecommendations() throws MiningException {
int num = getSequenceRules().size();
Hashtable h = new Hashtable();
for(int i=0;i<num;i++)
{
CustomRuleSetSeq crs = (CustomRuleSetSeq) getSequenceRules().get(i);
CustomSequence pr = crs.getPremise();
Vector v = (Vector)h.get(pr);
if(v == null)
{
v = new Vector();
h.put(pr,v);
}
v.add(crs);
}
recs = new Hashtable();
Enumeration keys = h.keys();
while(keys.hasMoreElements()) {
CustomSequence pr = (CustomSequence)keys.nextElement();
Vector v = (Vector)h.get(pr); // all conclusions for premise pr
int size = v.size();
if(size == 1)
{
recs.put(pr,v.get(0));
}
else // not one conclusion - find those with max support
{
double max = 0;
Vector r = new Vector();
for(int i=0;i<size;i++)
{
CustomRuleSetSeq crs = (CustomRuleSetSeq)v.get(i);
double supp = crs.getSupport();
if(max < supp)
{
r.clear();
r.add(crs);
max = supp;
}
else
if(max == supp) r.add(crs);
}
size = r.size();
if(size == 1)
{
recs.put(pr,r.get(0));
}
else // still multiple conclusions - find those with max support
{
v = new Vector();
max = 0;
for(int i=0;i<size;i++)
{
CustomRuleSetSeq crs = (CustomRuleSetSeq)r.get(i);
double conf = crs.getConfidence();
if(max < conf)
{
v.clear();
v.add(crs);
max = conf;
}
else if(max == conf) v.add(crs);
}
recs.put(pr,v.get(0)); // use first conclusion anyway
}
}
}
}
/**
* Applies customer sequence rules to new customer item sets. Returns
* customer rule set of recommended itemsets or null if no matching rule
* could be found.
* Before using this method, buildRecommendations must be called. <p>
*
* When some rules are found for a recommendation, one rule is
* selected according to the following strategy:
* <ol>
* <li> Rule with highest support.
* <li> If equal supports, rule with highest confidence.
* <li> If also equal confidences, randomly.
* </ol>
*
* @param items vector of itemsets
* @return customer rule set of recommended categories, null if no proper rule found
* @exception MiningException empty hashtable for recommendations
*/
public CustomRuleSetSeq applyCustomer(Vector items) throws MiningException {
if(recs == null) throw new MiningException("recommendations not builded");
CustomSequence cs = new CustomSequence();
int num = items.size();
for(int i = 0; i < num; i++)
{
cs.addItemSet( (ItemSet) items.elementAt(i) );
}
System.out.println("Finding recommendation for "+cs+"("+cs.hashCode()+")");
CustomRuleSetSeq crs = (CustomRuleSetSeq)recs.get(cs);
return crs;
}
/**
* Applying the customer sequential rules mining model to a customer
* sequence; i.e. in a non-transactional representation. Returns
* CustomRuleSetSeq of recommended items or null if no customer sequence rules
* contain the antecedent.
*
* Before using this method, buildRecommendations must be called.
*
* @param miningData customer sequence
* @return customer sequential rule set of recommended items or null
* @throws MiningException thrown if miningData not instance of CustomSequence
*/
public MiningMatrixElement applyModel(MiningMatrixElement miningData)
throws MiningException {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -