📄 transformationbuild.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Carsten Weisse
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Examples;
import java.io.FileWriter;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Input.MiningFilterStream;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Statistics.StatisticsMiningModel;
import com.prudsys.pdm.Models.Statistics.StatisticsSettings;
import com.prudsys.pdm.Models.Statistics.Algorithms.Statistics;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Transform.MiningTransformationFactory;
import com.prudsys.pdm.Transform.MiningTransformationStep;
import com.prudsys.pdm.Transform.OneToMultiple.Binning;
import com.prudsys.pdm.Transform.OneToOne.CategMapping;
import com.prudsys.pdm.Transform.OneToOne.CategNumMapping;
import com.prudsys.pdm.Transform.OneToOne.Categorization;
import com.prudsys.pdm.Transform.OneToOne.Discretization;
import com.prudsys.pdm.Transform.OneToOne.Exponential;
import com.prudsys.pdm.Transform.OneToOne.LinearNormal;
import com.prudsys.pdm.Transform.OneToOne.Reciprocal;
import com.prudsys.pdm.Transform.OneToOne.Root;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Builds a transformations and writes it to
* PMML file 'Transformation.xml'.
*/
public class TransformationBuild extends BasisExample
{
/**
* Empty constructor.
*/
public TransformationBuild() {
}
/**
* Run the example of this class.
*
* @throws Exception error while example is running
*/
public void runExample() throws Exception {
// Open 'iris.arff':
MiningArffStream arff = new MiningArffStream( "data/arff/iris.arff" );
// Calculate statistics for 'sepalwidth':
StatisticsSettings statSett = new StatisticsSettings();
statSett.setDataSpecification( arff.getMetaData() );
statSett.setUnivariateTarget( arff.getMetaData().getMiningAttribute("sepalwidth") );
Statistics statAlg = new Statistics();
statAlg.setMiningSettings( statSett );
statAlg.setMiningInputStream( arff );
StatisticsMiningModel statModel = (StatisticsMiningModel) statAlg.buildModel();
arff.reset();
// Exponential transformation of 'sepallength':
Exponential exp = new Exponential();
exp.setSourceName( "sepallength" );
exp.setTargetName( "t_sepallength" );
Discretization disc = new Discretization();
disc.setSourceName( "sepallength" );
disc.setTargetName( "td_sepallength" );
double[] bounds = {4.5, 5, 6, 7.5};
disc.setBounds( bounds );
// Category mapping transformation:
CategMapping catMap = new CategMapping();
catMap.setSourceName("class");
catMap.setTargetName("t_class");
catMap.addCategoryPair( new Category("Iris-setosa"), new Category("setosa") ) ;
catMap.addCategoryPair( new Category("Iris-virginica"), new Category("virver") ) ;
catMap.addCategoryPair( new Category("Iris-versicolor"), new Category("virver") ) ;
// Category mapping transformation:
CategNumMapping catMapN = new CategNumMapping();
catMapN.setSourceName("class");
catMapN.setTargetName("t-n_class");
catMapN.addCategoryPair( new Category("Iris-setosa"), new Double(0) );
catMapN.addCategoryPair( new Category("Iris-virginica"), new Double(111) ) ;
catMapN.addCategoryPair( new Category("Iris-versicolor"), new Double(111) ) ;
// Reciprocal transformation of 'sepalwidth', original attr. remains:
Reciprocal rec = new Reciprocal();
rec.setSourceName( "sepalwidth" );
rec.setTargetName( "tr_sepalwidth" );
rec.setRemoveSourceAttribute(false);
// (0,1) transformation of 'sepalwidth', original attribute remains:
LinearNormal norm01 = new LinearNormal();
norm01.setSourceName( "sepalwidth" );
norm01.setTargetName( "tn_sepalwidth" );
norm01.setMin(2.0);
norm01.setMax(4.4);
norm01.setOutliers( LinearNormal.OUTLIER_TREATMENT_METHOD_asExtremeValues );
norm01.setStatisticsMiningModel( statModel );
norm01.setRemoveSourceAttribute(false);
// Transform numeric in categorical attribute:
Categorization categ = new Categorization();
categ.setSourceName( "petallength" );
categ.setTargetName( "t_petallength" );
// Binning of 'class':
Binning binn = new Binning();
binn.setFeatureName( "class" );
// This part is now only required if user-defined classifier names are desired:
// CategoricalAttribute cat = (CategoricalAttribute)
// arff.getMetaData().getMiningAttribute( "class" );
// String[] classifierNames = new String[ cat.getCategoriesNumber() ];
// for (int i = 0; i < cat.getCategoriesNumber(); i++)
// classifierNames[i] = "t_class_" +
// cat.getCategory(i).getDisplayValue();
// binn.setClassifierName( classifierNames );
// Create first mining transformation step using factory:
MiningTransformationFactory mtf = new MiningTransformationFactory();
mtf.addOneToOneMapping(exp);
mtf.addOneToOneMapping(disc);
mtf.addOneToOneMapping(catMap);
mtf.addOneToOneMapping(catMapN);
mtf.addOneToOneMapping(rec);
mtf.addOneToOneMapping(norm01);
mtf.addOneToOneMapping(categ);
mtf.addOneToMultipleMapping(binn);
MiningTransformationStep mts = mtf.createMiningTransformationStep();
// Root transformation of 't_sepallength':
Root root = new Root();
root.setSourceName( "t_sepallength" );
root.setTargetName( "tt_sepallength" );
// Create second mining transformation step using factory:
mtf.reset();
mtf.addOneToOneMapping(root);
MiningTransformationStep mts2 = mtf.createMiningTransformationStep();
// Create mining transformation activity:
MiningTransformationActivity mta = new MiningTransformationActivity();
mta.addTransformationStep(mts);
mta.addTransformationStep(mts2);
// Create mining filter stream from iris file and our transformer:
MiningFilterStream filter = new MiningFilterStream( arff, mta );
// Display transformed data:
System.out.println("User-Transformed Iris: ");
System.out.println( filter );
// Write transformed data into PMML document:
FileWriter writer = new FileWriter("data/pmml/Transformation.xml");
MiningDataSpecification mds = filter.getMetaData();
mds.writePmml(writer);
// Show in browser:
if (debug == 2) PmmlUtils.openPmmlBrowser("Transformation.xml");
}
/**
* Example of building transformations.
*
* @param args (ignored)
*/
public static void main(String[] args)
{
try {
new TransformationBuild().runExample();
}
catch (Exception ex)
{
ex.printStackTrace();
};
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -