📄 associationrulesminingmodel.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @version 1.0
*/
package com.prudsys.pdm.Models.AssociationRules;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import com.prudsys.pdm.Adapters.PmmlVersion20.AssociationModel;
import com.prudsys.pdm.Adapters.PmmlVersion20.AssociationRule;
import com.prudsys.pdm.Adapters.PmmlVersion20.DataDictionary;
import com.prudsys.pdm.Adapters.PmmlVersion20.Header;
import com.prudsys.pdm.Adapters.PmmlVersion20.Item;
import com.prudsys.pdm.Adapters.PmmlVersion20.ItemRef;
import com.prudsys.pdm.Adapters.PmmlVersion20.Itemset;
import com.prudsys.pdm.Adapters.PmmlVersion20.MiningSchema;
import com.prudsys.pdm.Adapters.PmmlVersion20.PMML;
import com.prudsys.pdm.Adapters.PmmlVersion20.TransformationDictionary;
import com.prudsys.pdm.Core.ApplicationInputSpecification;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryProperty;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningMatrixElement;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Core.OrdinalAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningStoredData;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Description of data produced by a association rule mining function. <p>
*
* From PDM CWM extension. <P>
*
* Superclasses:
* <ul>
* <li> MiningModel
* </ul>
* Attributes:
* <ul>
* <li> <i>itemIdName</i>: Name of the item id attribute. <br>
* - class: String <br>
* - multiplicity: exactly one <br>
* <li> <i>transactIdName</i>: Name of the transction id attribute. <br>
* - class: String <br>
* - multiplicity: exactly one <br>
* <li> <i>associationRules</i>: List of all association rules. <br>
* - class: RuleSet <br>
* - multiplicity: one or more <br>
* <li> <i>largeItemSets</i>: List of all large itemsets. <br>
* - class: ItemSet <br>
* - multiplicity: one or more
* </ul>
*
* In addition, functionality from PMML was added.
* It corresponds to the PMML element AssociationModel.
*
* @see MiningModel
* @see com.prudsys.pdm.Adapters.PmmlVersion20.AssociationModel
* @see ItemSet
* @see RuleSet
*/
public class AssociationRulesMiningModel extends MiningModel
{
// -----------------------------------------------------------------------
// Constants of PMML and MiningInputStream export
// -----------------------------------------------------------------------
/** Export as PMML 2.0 rule. */
public static final int EXPORT_PMML_NAME_TYPE_PMML20 = 0;
/** Export as XELOPES extension of PMML 2.0 for itemId and transactionId. */
public static final int EXPORT_PMML_NAME_TYPE_XELOPES = 1;
/** Export as PMML 2.1 rule (in MiningSchema 'group' for transactionId). */
public static final int EXPORT_PMML_NAME_TYPE_PMML21 = 2; // not supported yet
/** Export large itemsets in toMiningInputStream method. */
public static final int EXPORT_MIS_LARGE_ITEMSETS = 0;
/** Export association rules in toMiningInputStream method. */
public static final int EXPORT_MIS_ASSOCIATION_RULES = 1;
/** Export 2-item association rules in toMiningInputStream as pairs. */
public static final int EXPORT_MIS_ASSOCIATION_RULES_FLAT = 2;
/** Include support value into export in toMiningInputStream. */
public static final int EXPORT_MISCHAR_SUPPORT = 0;
/** Include confidence value into export in toMiningInputStream. */
public static final int EXPORT_MISCHAR_CONFIDENCE = 1;
/** Include coverage value into export in toMiningInputStream. */
public static final int EXPORT_MISCHAR_COVERAGE = 2;
/** Include lift value into export in toMiningInputStream. */
public static final int EXPORT_MISCHAR_LIFT = 4;
/** Include cosine value into export in toMiningInputStream. */
public static final int EXPORT_MISCHAR_COSINE = 8;
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Name of itemId attribute. */
private String itemIdName = "itemId";
/** Name of transactId attribute. */
private String transactIdName = "transactId";
/** Vector of large itemsets. */
private Vector largeItemSets = null;
/** Vector of association rules. */
private Vector associationRules = null;
/** Number of transactions of mining data. */
private int numberOfTransactions = -1;
/** Export all transaction IDs into PMML? */
private boolean exportTransactIds = true;
/** PMML export type of assciation rules model. */
private int exportTransactItemNames = EXPORT_PMML_NAME_TYPE_XELOPES;
/** Hashtable of all large itemsets. */
private Hashtable lits = null;
/** Hashtable of all recommendations. */
private Hashtable recs = null;
/** Export type of toMiningInputStream method. */
protected int misExportType = EXPORT_MIS_ASSOCIATION_RULES;
/** Export subtype of toMiningInputStream for characteristics. */
protected int misExportCharType = EXPORT_MISCHAR_SUPPORT + EXPORT_MISCHAR_CONFIDENCE;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Constructor sets function and algorithm parameters.
*/
public AssociationRulesMiningModel()
{
function = MiningModel.ASSOCIATION_RULES_FUNCTION;
algorithm = MiningModel.ASSOCIATION_RULES_ALGORITHM;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Get name of itemId attribute.
*
* @return name of itemId attribute
*/
public String getItemIdName()
{
return itemIdName;
}
/**
* Set name of itemId attribute.
*
* @param itemIdName of itemId attribute
*/
public void setItemIdName(String itemIdName)
{
this.itemIdName = itemIdName;
}
/**
* Get name of transactionId attribute.
*
* @return name of transactionId attribute
*/
public String getTransactIdName()
{
return transactIdName;
}
/**
* Set name of tranactionId attribute.
*
* @param transactIdName of transactionId attribute
*/
public void setTransactIdName(String transactIdName)
{
this.transactIdName = transactIdName;
}
/**
* Get vector of large itemsets.
*
* @return vector of large itemsets (ItemSet objects)
*/
public Vector getLargeItemSets()
{
return largeItemSets;
}
/**
* Set vector of large itemsets.
*
* @param largeItemSets vector of large itemsets (ItemSet objects)
*/
public void setLargeItemSets(Vector largeItemSets)
{
this.largeItemSets = largeItemSets;
}
/**
* Get vector of association rules.
*
* @return vector of association rules (RuleSet objects)
*/
public Vector getAssociationRules()
{
return associationRules;
}
/**
* Set vector of association rules.
*
* @param associationRules vector of association rules (RuleSet objects)
*/
public void setAssociationRules(Vector associationRules)
{
this.associationRules = associationRules;
}
/**
* Returns number of transactions. If not specified explicitely,
* it is calculated using the number of categories of the transaction IDs
* attribute (where the paramter unstoredCategories must be false).
*
* @return number of transactions, -1 if unknown
*/
public int getNumberOfTransactions()
{
int nTransact = numberOfTransactions;
if (nTransact < 0) {
CategoricalAttribute transactId = (CategoricalAttribute) miningSettings.
getDataSpecification().getMiningAttribute(transactIdName);
if ( transactId != null && !transactId.isUnstoredCategories() )
nTransact = transactId.getCategoriesNumber();
};
return nTransact;
}
/**
* Set number of transactions.
*
* @param numberOfTransactions new number of transactions, -1 if unknown
*/
public void setNumberOfTransactions(int numberOfTransactions)
{
this.numberOfTransactions = numberOfTransactions;
}
/**
* Write all transaction IDs into PMML (default: true)?
*
* @return true if write all transaction IDs into PMML, otherwise not
*/
public boolean isExportTransactIds()
{
return exportTransactIds;
}
/**
* Set export all transaction IDs into PMML (default: true).
*
* @param exportTransactIds true if export, otherwise false
*/
public void setExportTransactIds(boolean exportTransactIds)
{
this.exportTransactIds = exportTransactIds;
}
/**
* Returns type how item and transaction IDs are handled in PMML.
*
* @return PMML export type of item and transaction IDs
*/
public int getExportTransactItemNames()
{
return exportTransactItemNames;
}
/**
* Sets type how item and transaction IDs are handled in PMML.
* This is because of an incompleteness in PMML 20: item and
* transaction ID are not specially denoted in the mining schema.
* This makes PMML20 association models not really applicable
* to new data (except you use agreed names for the IDs). <p>
*
* There are three ways to handle this problem:
* 1. Do nothing: conform with PMML 2.0 but lose of functionality,
* 2. Use XELOPES PMML Extension: to AssociationModel two new attributes
* 'itemIdName' (itemId) and 'transactIdName' (transactionId) are added,
* 3. Use PMML 2.1 solution: transaction ID is marked as 'group'
* in mining schema, hence the other active attribute is the item ID
* (currently not implemented in XELOPES).
*
* @param exportTransactItemNames PMML export type of item and transaction IDs
*/
public void setExportTransactItemNames(int exportTransactItemNames)
{
this.exportTransactItemNames = exportTransactItemNames;
}
/**
* Returns export type to mining input stream (method toMiningInputStream).
*
* @return export type to mining input stream
*/
public int getMisExportType()
{
return misExportType;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -