📄 flatrulesselectionbuild.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Carsten Weisse
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Examples;
import java.io.FileWriter;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryHierarchy;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.Multidimensional.MultidimensionalStream;
import com.prudsys.pdm.Input.Multidimensional.OrderAttribute;
import com.prudsys.pdm.Input.Multidimensional.SelectAttribute;
import com.prudsys.pdm.Input.Records.Csv.MiningCsvStream;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesAlgorithm;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesMiningModel;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesSettings;
import com.prudsys.pdm.Models.AssociationRules.Algorithms.FlatRules.FlatRulesAlgorithm;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Builds flat rule model on taxonomy data and runs selection on rules.
*/
public class FlatRulesSelectionBuild extends BasisExample {
/**
* Empty constructor.
*/
public FlatRulesSelectionBuild() {
}
/**
* Run the example of this class.
*
* @throws Exception error while example is running
*/
public void runExample() throws Exception {
// ----------- Open data source as multidimensional stream --------------
// Create metadata and open csv source stream:
MiningDataSpecification mds = new MiningDataSpecification();
mds.setRelationName("Sessions");
CategoricalAttribute transactId = new CategoricalAttribute("transactionId");
CategoricalAttribute itemId = new CategoricalAttribute("itemId");
NumericAttribute itemIndex = new NumericAttribute("itemIndex");
mds.addMiningAttribute(transactId);
mds.addMiningAttribute(itemIndex);
mds.addMiningAttribute(itemId);
MiningInputStream inputData0 = new MiningCsvStream( "data/csv/sessions.txt", mds );
inputData0.open();
// Open multidimensional stream, use only itemId attribute for selection:
MultidimensionalStream inputData = new MultidimensionalStream(inputData0);
Vector<String> selectionAttributes = new Vector<String>();
selectionAttributes.addElement( itemId.getName() );
inputData.readMultidimensionalStreamData();
// Get meta data:
MiningDataSpecification metaData = inputData.getMetaData();
CategoricalAttribute categoryItemId = (CategoricalAttribute)metaData.getMiningAttribute( "itemId" );
CategoricalAttribute categoryTransactId = (CategoricalAttribute)metaData.getMiningAttribute( "transactionId" );
categoryTransactId.setUnstoredCategories(true);
// Add taxonomy of items to categoryItemId (for 'sessions.txt'):
AssociationRulesTaxonomyBuild.addTaxonomy(categoryItemId);
// -------------------- Apply selection to stream -----------------------
// Apply selection of all product items to input stream:
CategoryHierarchy cah = categoryItemId.getTaxonomy();
Category productRoot = new Category("2.Produkte");
Vector products = cah.getAllChildren(productRoot);
SelectAttribute sa = new SelectAttribute( categoryItemId.getName(), productRoot.getDisplayValue() );
for (int i = 0; i < products.size(); i++) {
Category product = (Category) products.elementAt(i);
sa.addCategory( product.getDisplayValue() );
};
SelectAttribute[] selArr = {sa};
inputData.runSelections(selArr);
inputData.reset();
// ------------------------- Build flat rules ---------------------------
// Create MiningSettings object and assign metadata:
AssociationRulesSettings miningSettings = new AssociationRulesSettings();
miningSettings.setDataSpecification( metaData );
// Assign settings:
miningSettings.setItemId( categoryItemId );
miningSettings.setTransactionId( categoryTransactId );
miningSettings.setMinimumConfidence( 0.3 );
miningSettings.setMinimumSupport( 0.01 );
miningSettings.verifySettings();
// Get default mining algorithm specification (MAS) from 'algorithms.xml':
MiningAlgorithmSpecification miningAlgorithmSpecification =
MiningAlgorithmSpecification.getMiningAlgorithmSpecification( "FlatRules", null);
if( miningAlgorithmSpecification == null )
throw new MiningException( "Can't find application FlatRules." );
// Get class name from algorithms specification:
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
throw new MiningException( "classname attribute expected." );
// Set and display mining algorithm specification parameters:
miningAlgorithmSpecification.setMAPValue("minimumItemSize", "1");
miningAlgorithmSpecification.setMAPValue("maximumItemSize", "5");
miningAlgorithmSpecification.setMAPValue("rulesOrderType", String.valueOf(FlatRulesAlgorithm.ORDER_LIFT) );
miningAlgorithmSpecification.setMAPValue("createLargeItemSets", "true");
GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);
// Create algorithm object with default values:
AssociationRulesAlgorithm algorithm = (AssociationRulesAlgorithm)
GeneralUtils.createMiningAlgorithmInstance(className);
// Put it all together:
algorithm.setMiningInputStream( inputData );
algorithm.setMiningSettings( miningSettings );
algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
// Parameter specific for AssociationRulesAlgorithm but not in MAS:
algorithm.setExportTransactIds(false);
algorithm.setExportTransactItemNames( AssociationRulesMiningModel.EXPORT_PMML_NAME_TYPE_XELOPES );
algorithm.verify();
// Build the mining model:
MiningModel model = algorithm.buildModel();
System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());
// ------------------- Show and export flat rules ----------------------
// Show results:
FlatRulesBuild.showRules((AssociationRulesMiningModel) model);
// Write to PMML:
FileWriter writer = new FileWriter("data/pmml/FlatRulesSelectionModel.xml");
model.writePmml(writer);
// Show in browser:
if (debug == 2) PmmlUtils.openPmmlBrowser("FlatRulesSelectionModel.xml");
// --------------- Perform selection and ordering of rules --------------
runRuleSelection( (AssociationRulesMiningModel) model );
}
/**
* Example of building an association rules model with selection.
*
* @param args arguments (ignored)
*/
public static void main(String[] args) {
try {
new FlatRulesSelectionBuild().runExample();
}
catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Performs selection and ordering rules.
*
* @param ruleModel flat rules model
* @exception MiningException mining exception
*/
public static void runRuleSelection(AssociationRulesMiningModel ruleModel)
throws MiningException {
// --------------------- Convert rules as stream ------------------------
// Convert rules into mining input stream:
ruleModel.setMisExportType(AssociationRulesMiningModel.EXPORT_MIS_ASSOCIATION_RULES_FLAT );
ruleModel.setMisExportCharType( AssociationRulesMiningModel.EXPORT_MISCHAR_SUPPORT +
AssociationRulesMiningModel.EXPORT_MISCHAR_LIFT);
MiningInputStream mis = ruleModel.toMiningInputStream();
System.out.println();
System.out.println("Rules as mining input stream: " + mis);
// ----------------- Create multidimenional stream ----------------------
mis.reset();
MultidimensionalStream multiRules = new MultidimensionalStream(mis);
multiRules.readMultidimensionalStreamData();
// ------------- Create selection and ordering conditions ---------------
CategoricalAttribute itemId = (CategoricalAttribute)( (AssociationRulesSettings) ruleModel.getMiningSettings() ).getItemId();
CategoryHierarchy cah = itemId.getTaxonomy();
// Select all software products in premise:
Category softwareRoot = new Category("2.1.Software");
Vector software = cah.getAllChildren(softwareRoot);
SelectAttribute sa = new SelectAttribute( "itemIdPremise", softwareRoot.getDisplayValue() );
for (int i = 0; i < software.size(); i++) {
Category soft = (Category) software.elementAt(i);
sa.addCategory( soft.getDisplayValue() );
};
// Select all algorithm and component products in conclusion:
SelectAttribute sa2 = new SelectAttribute( "itemIdConclusion", "2.3.Algorithmen");
sa2.addCategory("2.2.Komponenten");
SelectAttribute[] selArr = {sa, sa2};
// Order by 1. itemIdPremise, 2. support (increasing), 3. lift (decreasing):
OrderAttribute oa1 = new OrderAttribute( "itemIdPremise", OrderAttribute.UP );
OrderAttribute oa2 = new OrderAttribute( "support", OrderAttribute.UP );
OrderAttribute oa3 = new OrderAttribute( "lift", OrderAttribute.DOWN );
OrderAttribute[] ordArr = {oa1, oa2, oa3};
// ------------- Apply selection and ordering to stream -----------------
// Run selection and ordering:
multiRules.setOrdering(ordArr);
multiRules.runSelections(selArr);
System.out.println("Selected rules as mining input stream: " + multiRules);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -