📄 statisticsbuild.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Carsten Weisse
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Examples;
import java.io.FileWriter;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmParameter;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Statistics.CategoricalGroup;
import com.prudsys.pdm.Models.Statistics.ContingencyEntry;
import com.prudsys.pdm.Models.Statistics.Group;
import com.prudsys.pdm.Models.Statistics.GroupingParameter;
import com.prudsys.pdm.Models.Statistics.NumericGroup;
import com.prudsys.pdm.Models.Statistics.StatisticsMiningModel;
import com.prudsys.pdm.Models.Statistics.StatisticsSettings;
import com.prudsys.pdm.Models.Statistics.TimeGroup;
import com.prudsys.pdm.Utils.GeneralUtils;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Builds a statistics model with multidimensional grouping and writes it to
* PMML file 'StatisticsModel.xml'.
*/
public class StatisticsBuild extends BasisExample {
/**
* Empty constructor.
*/
public StatisticsBuild() {
}
/**
* Run the example of this class.
*
* @throws Exception error while example is running
*/
public void runExample() throws Exception {
// Open data source and get metadata:
MiningInputStream inputData = new MiningArffStream( "data/arff/custom-transact.arff");
MiningDataSpecification metaData = inputData.getMetaData();
// Get target and grouping attributes:
MiningAttribute univariateTargetAttribute = metaData.getMiningAttribute( "customerId" );
MiningAttribute groupingAttribute1 = metaData.getMiningAttribute( "transactionPosition" );
MiningAttribute groupingAttribute2 = metaData.getMiningAttribute( "itemId" );
// Create MiningSettings object and assign metadata:
StatisticsSettings miningSettings = new StatisticsSettings();
miningSettings.setDataSpecification( metaData );
// Assign settings:
int MaxGroups = 10;
GroupingParameter gp1 = new GroupingParameter(
metaData.getAttributeIndex(groupingAttribute1), MaxGroups, GroupingParameter.INCREASING);
GroupingParameter gp2 = new GroupingParameter(
metaData.getAttributeIndex(groupingAttribute2), MaxGroups, GroupingParameter.INCREASING);
Vector groups = new Vector();
groups.addElement( gp1 );
groups.addElement( gp2 );
miningSettings.setGrouping( groups );
miningSettings.setUnivariateTarget(univariateTargetAttribute);
miningSettings.verifySettings();
// Generate mining algorithm specification directly:
MiningAlgorithmSpecification miningAlgorithmSpecification =
createMiningAlgorithmSpecification();
// Get class name from algorithms specification:
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
throw new MiningException( "classname attribute expected." );
// Set and display mining parameters:
GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);
// Create algorithm object with default values:
MiningAlgorithm algorithm = GeneralUtils.createMiningAlgorithmInstance(className);
// Put it all together:
algorithm.setMiningInputStream( inputData );
algorithm.setMiningSettings( miningSettings );
algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
algorithm.verify();
// Build the mining model:
MiningModel model = algorithm.buildModel();
System.out.println("calculation time [s]: " + algorithm.getTimeSpentToBuildModel());
// Show results:
showStats((StatisticsMiningModel) model);
// Write to text:
FileWriter writer = new FileWriter("data/pmml/StatisticsModel.txt");
model.writePlainText(writer);
// Write to PMML:
writer = new FileWriter("data/pmml/StatisticsModel.xml");
model.writePmml(writer);
// Show in browser:
if (debug == 2) PmmlUtils.openPmmlBrowser("StatisticsModel.xml");
}
/**
* Example of building a statistics model.
*
* @param args arguments (ignored)
*/
public static void main(String[] args) {
try {
new StatisticsBuild().runExample();
}
catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Create MiningAlgorithmSpecification directly instead
* of reading from algorithms.xml file.
*
* @return new object of MiningAlgorithmSpecification
*/
private static MiningAlgorithmSpecification createMiningAlgorithmSpecification() {
MiningAlgorithmSpecification miningAlgorithmSpecification =
new MiningAlgorithmSpecification();
miningAlgorithmSpecification.setName("Statistics");
miningAlgorithmSpecification.setFunction("StatisticalAnalysis");
miningAlgorithmSpecification.setAlgorithm("DescriptiveAnalysis");
miningAlgorithmSpecification.setClassname(
"com.prudsys.pdm.Models.Statistics.Algorithms.Statistics");
miningAlgorithmSpecification.setVersion("1.0");
MiningAlgorithmParameter[] miningAlgorithmParameter =
new MiningAlgorithmParameter[0];
miningAlgorithmSpecification.setInputAttribute(miningAlgorithmParameter);
return miningAlgorithmSpecification;
}
/**
* Show statistics model.
*
* @param statsModel model of statistics
*/
public static void showStats(StatisticsMiningModel statsModel) {
// Get root group:
Group rootGroup = statsModel.getRootGroup();
// Call resursive statistics:
String stat = getStat( rootGroup, 0, statsModel );
System.out.println("Stats: " + stat);
}
/**
* Print some tabs.
*
* @param num number of tabs
* @return tabs
*/
private static String printTabs(int num) {
String s = "";
for(int i=0; i<num; i++) s = s + "\t";
return s;
}
/**
* Get statistics data recursively.
*
* @param group group for output
* @param level level in tree, only used for display formatting
* @param statsModel statistics model
* @return String representation of output
*/
private static String getStat( Group group, int level, StatisticsMiningModel statsModel ) {
String description = "";
MiningDataSpecification metaData = statsModel.getMiningSettings().getDataSpecification();
int attrNum = group.getGroupAttribute();
if(attrNum!=-1)
{
if(group instanceof CategoricalGroup)
{
CategoricalGroup categ = (CategoricalGroup)group;
CategoricalAttribute attribute = (CategoricalAttribute)metaData.getMiningAttribute(attrNum);
double value = categ.getValue();
description = description + printTabs(level);
description = description + "Categorical group: "+attribute.getName()+" with value: "+value+" ("+attribute.getCategory(value)+")" + "\n";
}
else if(group instanceof NumericGroup)
{
NumericGroup numeric = (NumericGroup)group;
NumericAttribute attribute = (NumericAttribute)metaData.getMiningAttribute(attrNum);
double low = numeric.getLow();
double high = numeric.getHigh();
description = description + printTabs(level);
description = description + "Numeric group: "+attribute.getName()+" ("+low+" .. "+high+")" + "\n";
}
else
{
TimeGroup time = (TimeGroup)group;
NumericAttribute attribute = (NumericAttribute)metaData.getMiningAttribute(attrNum);
int unit = time.getGroupUnit();
int value = time.getGroupValue();
description = description + printTabs(level);
description = description + "Time group: "+attribute.getName()+" with unit ";
switch(unit)
{
case java.util.Calendar.SECOND: description = description + "'Seconds'"; break;
case java.util.Calendar.MINUTE: description = description + "'Minute'"; break;
case java.util.Calendar.HOUR_OF_DAY: description = description + "'Hour'"; break;
case java.util.Calendar.DAY_OF_MONTH: description = description + "'Day'"; break;
case java.util.Calendar.WEEK_OF_MONTH: description = description + "'Week'"; break;
case java.util.Calendar.MONTH: description = description + "'Month'"; break;
case java.util.Calendar.YEAR: description = description + "'Year'"; break;
default: description = description + "'Unknown!!!' = "+unit;
}
description = description + " = "+value + "\n";
}
}
else
{
description = description + printTabs(level);
if(group.getParent()==null) description = description + "Root" + "\n";
else description = description + "Other" + "\n";
}
description = description + printTabs(level); description = description + "Contains "+group.getCount()+" vectors" + "\n";
if(!group.isLeaf())
{
description = description + printTabs(level+1);
description = description + group.getChildCount()+" subgroups" + "\n";
for(int i=0;i<group.getChildCount();i++)
description = description + getStat((Group)group.getChildAt(i),level+1,statsModel);
}
else
{
long count = group.getCount();
if(count!=0)
{
description = description + printTabs(level); description = description + "Univariate:" + "\n";
description = description + printTabs(level+1); description = description + "Count: "+group.getCount() + "\n";
description = description + printTabs(level+1); description = description + "Min: "+group.getMin() + "\n";
description = description + printTabs(level+1); description = description + "Max: "+group.getMax() + "\n";
description = description + printTabs(level+1); description = description + "Range: "+group.getRange() + "\n";
description = description + printTabs(level+1); description = description + "Sum: "+group.getSum() + "\n";
description = description + printTabs(level+1); description = description + "Mean: "+group.getMean() + "\n";
description = description + printTabs(level+1); description = description + "Quartile 25: "+group.getQuart25() + "\n";
description = description + printTabs(level+1); description = description + "Quartile 50: "+group.getQuart50() + "\n";
description = description + printTabs(level+1); description = description + "Quartile 75: "+group.getQuart75() + "\n";
description = description + printTabs(level+1); description = description + "Interquartile range: "+group.getQuartRange() + "\n";
description = description + printTabs(level+1); description = description + "Variance: "+group.getVariance() + "\n";
description = description + printTabs(level+1); description = description + "Standart: "+group.getStandart() + "\n";
description = description + printTabs(level+1); description = description + "Skewness: "+group.getSkewness() + "\n";
description = description + printTabs(level+1); description = description + "Variation coefficient: "+group.getVarCoeff() + "\n";
description = description + printTabs(level); description = description + "Multivariate:" + "\n";
description = description + printTabs(level+1); description = description + "Correlation: "+group.getCorrelation() + "\n";
java.util.Enumeration em = group.getContingencyTable();
java.util.Vector vec;
if(em!=null)
{
vec = new java.util.Vector();
while(em.hasMoreElements())
vec.add(em.nextElement());
description = description + printTabs(level+1);
description = description + "Contingency table contains "+vec.size()+" elements" + "\n";
for(int i=0;i<vec.size();i++)
{
description = description + printTabs(level+2);
ContingencyEntry entry = (ContingencyEntry)vec.get(i);
description = description + entry.toString() + "\n";
}
}
else
{
description = description + printTabs(level+1);
description = description + "Contingency table is empty" + "\n";
}
description = description + "\n";
}
else
{
description = description + printTabs(level+1);
description = description + "Empty group" + "\n";
}
}
return description;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -