📄 associationrulestaxonomybuild.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Carsten Weisse
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Examples;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Hashtable;
import com.prudsys.pdm.Automat.MiningAutomationAssignment;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryHierarchy;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.Records.Csv.MiningCsvStream;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesAlgorithm;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesMiningModel;
import com.prudsys.pdm.Models.AssociationRules.AssociationRulesSettings;
import com.prudsys.pdm.Models.AssociationRules.RulesNumberAssessment;
import com.prudsys.pdm.Models.AssociationRules.RulesNumberCallback;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Builds an association rule model with with taxonomy and writes it to
* PMML file 'AssociationRulesTaxonomyModel.xml'.
*/
public class AssociationRulesTaxonomyBuild extends BasisExample {
/**
* Empty constructor.
*/
public AssociationRulesTaxonomyBuild() {
}
/**
* Run the example of this class.
*
* @throws Exception error while example is running
*/
public void runExample() throws Exception {
// Create metadata and open csv source stream:
MiningDataSpecification mds = new MiningDataSpecification();
mds.setRelationName("Sessions");
CategoricalAttribute transactId = new CategoricalAttribute("transactionId");
CategoricalAttribute itemId = new CategoricalAttribute("itemId");
NumericAttribute itemIndex = new NumericAttribute("itemIndex");
mds.addMiningAttribute(transactId);
mds.addMiningAttribute(itemIndex);
mds.addMiningAttribute(itemId);
MiningInputStream inputData = new MiningCsvStream( "data/csv/sessions.txt", mds );
inputData.open();
// Run through data to determine all categories of itemId (only for taxonomy):
while (inputData.next()) inputData.read();
inputData.reset();
MiningDataSpecification metaData = inputData.getMetaData();
// Get transactional attributes:
CategoricalAttribute categoryItemId = (CategoricalAttribute)metaData.getMiningAttribute( "itemId" );
CategoricalAttribute categoryTransactId = (CategoricalAttribute)metaData.getMiningAttribute( "transactionId" );
// Add taxonomy of items to categoryItemId (for 'sessions.txt'):
addTaxonomy(categoryItemId);
// Create MiningSettings object and assign metadata:
AssociationRulesSettings miningSettings = new AssociationRulesSettings();
miningSettings.setDataSpecification( metaData );
// Assign settings:
miningSettings.setItemId( categoryItemId );
miningSettings.setTransactionId( categoryTransactId );
miningSettings.setMinimumConfidence( 0.3 );
miningSettings.setMinimumSupport( 0.5 );
miningSettings.verifySettings();
// Get default mining algorithm specification (MAS) from 'algorithms.xml':
MiningAlgorithmSpecification miningAlgorithmSpecification =
MiningAlgorithmSpecification.getMiningAlgorithmSpecification( "AprioriSimple", null);
if( miningAlgorithmSpecification == null )
throw new MiningException( "Can't find application AprioriSimple." );
// Get class name from algorithms specification:
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
throw new MiningException( "classname attribute expected." );
// Set and display mining algorithm specification parameters:
miningAlgorithmSpecification.setMAPValue("minimumItemSize", "1");
miningAlgorithmSpecification.setMAPValue("maximumItemSize", "-1");
GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);
// Create automation parameter, if automation is required:
MiningAutomationAssignment maa = new MiningAutomationAssignment();
maa.setMiningModelAssessment( new RulesNumberAssessment() );
maa.setMiningAutomationCallback( new RulesNumberCallback() );
maa.setMinAssessment(50);
maa.setMaxAssessment(100);
maa.setMaxIterationNumber(30);
// Create algorithm object with default values:
AssociationRulesAlgorithm algorithm = (AssociationRulesAlgorithm)
GeneralUtils.createMiningAlgorithmInstance(className);
// Put it all together:
algorithm.setMiningInputStream( inputData );
algorithm.setMiningSettings( miningSettings );
algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
algorithm.setMiningAutomationAssignment( maa );
// Parameter specific for AssociationRulesAlgorithm but not in MAS:
algorithm.setUseTaxonomy(true);
algorithm.setPruneRuleTaxType(
AssociationRulesAlgorithm.TAX_REMOVE_ANCEST_AND_PARENTS_FROM_RULE );
algorithm.setExportTransactIds(false);
algorithm.setExportTransactItemNames(
AssociationRulesMiningModel.EXPORT_PMML_NAME_TYPE_XELOPES );
algorithm.verify();
// Build the mining model with automation:
MiningModel model = algorithm.buildModelWithAutomation();
System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());
// Show results:
AssociationRulesBuild.showRules((AssociationRulesMiningModel) model);
// Write to PMML:
FileWriter writer = new FileWriter("data/pmml/AssociationRulesTaxonomyModel.xml");
model.writePmml(writer);
// Show in browser:
if (debug == 2) PmmlUtils.openPmmlBrowser("AssociationRulesTaxonomyModel.xml");
}
/**
* Example of building an association rules model with taxonomies
* and parameter automation.
*
* @param args arguments (ignored)
*/
public static void main(String[] args) {
try {
new AssociationRulesTaxonomyBuild().runExample();
}
catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Add taxonomy to itemId attribute.
*
* This taxonomy operation works for simple hierarchical data
* like it is often (implicitely) contained in web server
* log files, namely the file 'sessions.txt'. It carries out the
* following operation: To any category with string representation
* <n_1>.<n_2>. ... <n_k-1><n_k>.<category name 1> the parent category
* <n_1>.<n_2>. ... <n_k-1>.<category name 2> is assigned. <p>
*
* Example:
* The category '2.1.3.2.EC_Tech' has the parent '2.1.3.EC' which
* in turn has the parent '2.1.Software', etc. <p>
*
* To all categories with single dot number like '3.Software' the
* root category '0.Home' is assigned. (Of course, except to the
* root category itself.)
*
* @param itemId categorical attribute to enrich with taxonomy
* @throws MiningException cannot add taxonomy
*/
public static void addTaxonomy(CategoricalAttribute itemId)
throws MiningException {
// Hierarchy for attribute itemId:
CategoryHierarchy cah = new CategoryHierarchy();
// Create hashtable number => category:
Hashtable<String, Category> numCat = new Hashtable<String, Category>();
ArrayList categs = itemId.getValues();
for (int i = 0; i < categs.size(); i++) {
Category categ = (Category) categs.get(i);
String cats = categ.toString();
// Find number:
int ndot = 0;
int ldot = -1;
for (int j = 0; j < cats.length(); j++) {
if (cats.charAt(j) == '.') {
ndot = ndot + 1;
ldot = j;
};
};
String fnum = cats.substring(0, ldot+1);
numCat.put(fnum, categ);
};
// Find parent in hashtable and add to category hierarchy:
for (int i = 0; i < categs.size(); i++) {
Category categ = (Category) categs.get(i);
String cats = categ.toString();
// Find number:
int ndot = 0;
int ldot = -1;
for (int j = 0; j < cats.length(); j++) {
if (cats.charAt(j) == '.') {
ndot = ndot + 1;
ldot = j;
};
};
String fnum = cats.substring(0, ldot+1);
// Add parent category:
if (fnum.equals("0."))
continue;
Category pcateg = numCat.get("0.");
if (ndot > 1) {
int ndot2 = 0;
int ldot2 = -1;
for (int j = 0; j < fnum.length(); j++) {
if (cats.charAt(j) == '.') {
ndot2 = ndot2 + 1;
if (ndot2 == ndot - 1)
ldot2 = j;
};
};
String pfnum = fnum.substring(0, ldot2+1);
pcateg = numCat.get(pfnum);
}
System.out.println(categ + " => " + pcateg);
// Add relationship to category hierarchy:
cah.addRelationship(pcateg, categ);
};
// Set taxonomy name:
cah.setName("websiteHierarchy");
// Add taxonomy to item ID attribute:
itemId.setTaxonomy(cah);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -