⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 transformationbuild.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

 /**
  * Title: XELOPES Data Mining Library
  * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
  * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
  * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
  * @author Carsten Weisse
  * @author Michael Thess
  * @version 1.0
  */
package com.prudsys.pdm.Examples;

import java.io.FileWriter;

import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Input.MiningFilterStream;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Statistics.StatisticsMiningModel;
import com.prudsys.pdm.Models.Statistics.StatisticsSettings;
import com.prudsys.pdm.Models.Statistics.Algorithms.Statistics;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Transform.MiningTransformationFactory;
import com.prudsys.pdm.Transform.MiningTransformationStep;
import com.prudsys.pdm.Transform.OneToMultiple.Binning;
import com.prudsys.pdm.Transform.OneToOne.CategMapping;
import com.prudsys.pdm.Transform.OneToOne.CategNumMapping;
import com.prudsys.pdm.Transform.OneToOne.Categorization;
import com.prudsys.pdm.Transform.OneToOne.Discretization;
import com.prudsys.pdm.Transform.OneToOne.Exponential;
import com.prudsys.pdm.Transform.OneToOne.LinearNormal;
import com.prudsys.pdm.Transform.OneToOne.Reciprocal;
import com.prudsys.pdm.Transform.OneToOne.Root;
import com.prudsys.pdm.Utils.PmmlUtils;

/**
 * Builds a transformations and writes it to
 * PMML file 'Transformation.xml'.
 */
public class TransformationBuild extends BasisExample
{
  /**
   * Empty constructor.
   */
  public TransformationBuild() {

  }

  /**
   * Run the example of this class.
   *
   * @throws Exception error while example is running
   */
  public void runExample() throws Exception {

    // Open 'iris.arff':
    MiningArffStream arff = new MiningArffStream( "data/arff/iris.arff" );

       // Calculate statistics for 'sepalwidth':
       StatisticsSettings statSett = new StatisticsSettings();
       statSett.setDataSpecification( arff.getMetaData() );
       statSett.setUnivariateTarget( arff.getMetaData().getMiningAttribute("sepalwidth") );
       Statistics statAlg = new Statistics();
       statAlg.setMiningSettings( statSett );
       statAlg.setMiningInputStream( arff );
       StatisticsMiningModel statModel = (StatisticsMiningModel) statAlg.buildModel();
       arff.reset();

    // Exponential transformation of 'sepallength':
    Exponential exp    = new Exponential();
    exp.setSourceName( "sepallength" );
    exp.setTargetName( "t_sepallength" );

    Discretization disc = new Discretization();
    disc.setSourceName( "sepallength" );
    disc.setTargetName( "td_sepallength" );
    double[] bounds = {4.5, 5, 6, 7.5};
    disc.setBounds( bounds );

    // Category mapping transformation:
    CategMapping catMap = new CategMapping();
    catMap.setSourceName("class");
    catMap.setTargetName("t_class");
    catMap.addCategoryPair( new Category("Iris-setosa"), new Category("setosa") ) ;
    catMap.addCategoryPair( new Category("Iris-virginica"), new Category("virver") ) ;
    catMap.addCategoryPair( new Category("Iris-versicolor"), new Category("virver") ) ;

    // Category mapping transformation:
    CategNumMapping catMapN = new CategNumMapping();
    catMapN.setSourceName("class");
    catMapN.setTargetName("t-n_class");
    catMapN.addCategoryPair( new Category("Iris-setosa"), new Double(0) );
    catMapN.addCategoryPair( new Category("Iris-virginica"), new Double(111) ) ;
    catMapN.addCategoryPair( new Category("Iris-versicolor"), new Double(111) ) ;

    // Reciprocal transformation of 'sepalwidth', original attr. remains:
    Reciprocal rec    = new Reciprocal();
    rec.setSourceName( "sepalwidth" );
    rec.setTargetName( "tr_sepalwidth" );
    rec.setRemoveSourceAttribute(false);

    // (0,1) transformation of 'sepalwidth', original attribute remains:
    LinearNormal norm01 = new LinearNormal();
    norm01.setSourceName( "sepalwidth" );
    norm01.setTargetName( "tn_sepalwidth" );
    norm01.setMin(2.0);
    norm01.setMax(4.4);
    norm01.setOutliers( LinearNormal.OUTLIER_TREATMENT_METHOD_asExtremeValues );
    norm01.setStatisticsMiningModel( statModel );
    norm01.setRemoveSourceAttribute(false);

    // Transform numeric in categorical attribute:
    Categorization categ = new Categorization();
    categ.setSourceName( "petallength" );
    categ.setTargetName( "t_petallength" );

    // Binning of 'class':
    Binning binn = new Binning();
    binn.setFeatureName( "class" );
//   This part is now only required if user-defined classifier names are desired:
//     CategoricalAttribute cat = (CategoricalAttribute)
//       arff.getMetaData().getMiningAttribute( "class" );
//     String[] classifierNames = new String[ cat.getCategoriesNumber() ];
//     for (int i = 0; i < cat.getCategoriesNumber(); i++)
//        classifierNames[i] = "t_class_" +
//          cat.getCategory(i).getDisplayValue();
//     binn.setClassifierName( classifierNames );

    // Create first mining transformation step using factory:
    MiningTransformationFactory mtf = new MiningTransformationFactory();
    mtf.addOneToOneMapping(exp);
    mtf.addOneToOneMapping(disc);
    mtf.addOneToOneMapping(catMap);
    mtf.addOneToOneMapping(catMapN);
    mtf.addOneToOneMapping(rec);
    mtf.addOneToOneMapping(norm01);
    mtf.addOneToOneMapping(categ);
    mtf.addOneToMultipleMapping(binn);
    MiningTransformationStep mts = mtf.createMiningTransformationStep();

    // Root transformation of 't_sepallength':
    Root root    = new Root();
    root.setSourceName( "t_sepallength" );
    root.setTargetName( "tt_sepallength" );

    // Create second mining transformation step using factory:
    mtf.reset();
    mtf.addOneToOneMapping(root);
    MiningTransformationStep mts2 = mtf.createMiningTransformationStep();

    // Create mining transformation activity:
    MiningTransformationActivity mta = new MiningTransformationActivity();
    mta.addTransformationStep(mts);
    mta.addTransformationStep(mts2);

    // Create mining filter stream from iris file and our transformer:
    MiningFilterStream filter = new MiningFilterStream( arff, mta );

    // Display transformed data:
    System.out.println("User-Transformed Iris: ");
    System.out.println( filter );

    // Write transformed data into PMML document:
    FileWriter writer = new FileWriter("data/pmml/Transformation.xml");
    MiningDataSpecification mds = filter.getMetaData();
    mds.writePmml(writer);

    // Show in browser:
    if (debug == 2) PmmlUtils.openPmmlBrowser("Transformation.xml");
  }

  /**
   * Example of building transformations.
   *
   * @param args (ignored)
   */
  public static void main(String[] args)
  {
      try {
        new TransformationBuild().runExample();
      }
      catch (Exception ex)
      {
        ex.printStackTrace();
      };
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -