⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nonlineardecisiontreeapply.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

 /**
  * Title: XELOPES Data Mining Library
  * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
  * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
  * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
  * @author Carsten Weisse
  * @author Michael Thess
  * @version 1.0
  */

package com.prudsys.pdm.Examples;

import java.io.FileReader;
import java.io.FileWriter;
import java.util.Date;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryHierarchy;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Input.MiningFilterStream;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Input.Records.Csv.MiningCsvStream;
import com.prudsys.pdm.Models.Classification.GainsChartAssessment;
import com.prudsys.pdm.Models.Classification.DecisionTree.DecisionTreeMiningModel;
import com.prudsys.pdm.Models.Classification.DecisionTree.DecisionTreeNode;
import com.prudsys.pdm.Models.Supervised.SupervisedMiningSettings;

/**
 * Applies a nonlinear decision tree which is read from the PMML file
 * 'NonlinearDecisionTreeModel.xml' to a data set.
 */
public class NonlinearDecisionTreeApply extends BasisExample {

  /** Calculate cumulative counts and display them for some vectors. */
  public static boolean calcCumulCounts = false;

  /** Calculate Gains chart and display it. */
  public static boolean calcGainsChart = false;

  /**
   * Empty constructor.
   */
  public NonlinearDecisionTreeApply() {
    debug = 0;
  }

  /**
   * Run the example of this class.
   *
   * @throws Exception error while example is running
   */
  public void runExample() throws Exception {

    // Read decision tree model from PMML file generated by prudsys DISCOVERER:
    DecisionTreeMiningModel model = new DecisionTreeMiningModel();
    FileReader reader = new FileReader("data/pmml/NonlinearDecisionTreeModel.xml");
    model.readPmml(reader);
    MiningDataSpecification modelMetaData = model.getMiningSettings().getDataSpecification();
    System.out.println("-------------> PMML model read successfully");

    // Write model back to PMML :
    FileWriter writer = new FileWriter("data/pmml/ndttest.xml");
    model.writePmml(writer);
    System.out.println("<------------- PMML model wrote successfully");

    // Get model target attribute its associated taxonomy object:
    CategoricalAttribute modelTargetAttribute = (CategoricalAttribute)
      ((SupervisedMiningSettings) model.getMiningSettings()).getTarget();
    CategoryHierarchy cah = modelTargetAttribute.getTaxonomy();

    // Open data source and get metadata:
    MiningInputStream inputData0 = new MiningCsvStream( "data/csv/vowel.csv");
    inputData0.open();
    inputData0.findPhysicalModel();
    System.out.println("Physical model: " + inputData0.getPhysicalModel());

    // Transform input data (dynamically) if required:
    MiningInputStream inputData = inputData0;
    if ( modelMetaData.isTransformed() )
      inputData = new MiningFilterStream(inputData0, modelMetaData.getMiningTransformationActivity());

    MiningDataSpecification inputMetaData     = inputData.getMetaData();
    CategoricalAttribute inputTargetAttribute = (CategoricalAttribute)
      inputMetaData.getMiningAttribute( modelTargetAttribute.getName() );

    // Show meta data:
    System.out.println("Prediction:");
    System.out.print("model: ");
    for (int i = 0; i < modelMetaData.getAttributesNumber(); i++) {
      System.out.print(modelMetaData.getMiningAttribute(i).getName() + " ");
    };
    System.out.print("\ninput: ");
    for (int i = 0; i < inputMetaData.getAttributesNumber(); i++) {
      System.out.print(inputMetaData.getMiningAttribute(i).getName() + " ");
    };

    // Show classification results:
    System.out.println();
    int i     = 0;
    int wrong = 0;
    long start = ( new Date() ).getTime();
    while (inputData.next()) {
      // Make prediction:
      MiningVector vector  = inputData.read();
      DecisionTreeNode dtn = (DecisionTreeNode) model.applyModel(vector);
      double predicted     = dtn.getScore();  // or: predicted = model.applyModelFunction(vector);
      Category predTarCat  = modelTargetAttribute.getCategory(predicted);
      double[] dist        = dtn.getDistribution();

      // Output and stats:
      double realTarCat    = vector.getValue(inputTargetAttribute);
      Category tarCat      = inputTargetAttribute.getCategory(realTarCat);
      if (cah != null) {   // if taxonomy applied, use first parent category
        java.util.Vector par = cah.getAllParents(tarCat);
        if (par.size() > 0) tarCat = (Category) par.elementAt(0);
      };

      i = i + 1;
      if (debug == 1) {
        System.out.print(" " + i + ": ");
        for (int j = 0; j < dist.length; j++)
          System.out.print(dist[j] + " ");
        if (dist.length == 2)
          System.out.print(" ==> " + dist[0] / (dist[0]+dist[1]));
        System.out.println();
      }
      if (debug == 2) {
        System.out.println(" " + i + ": " + vector + " -> " + predTarCat);

        System.out.print("dist: ");
        for (int j = 0; j < dist.length; j++)
          System.out.print(dist[j] + " ");
        System.out.println();
      }

      if (predTarCat == null || ! predTarCat.equals(tarCat) ) {
        wrong = wrong + 1;
      };
    };
    long end = ( new Date() ).getTime();
    double timeSpentToApplyModel = ( end - start ) / 1000.0;
    System.out.println("nvec = " + i);
    System.out.println("classification rate = " + (100.0 - ((double) wrong / i)*100.0) );
    System.out.println("time of application = " + timeSpentToApplyModel);

    // Calculate score values:
    if (calcCumulCounts) {
      model.updateCumulatedCounts();
      int numa = modelMetaData.getPretransformedMetaData().
          getAttributesNumber();
      System.out.println("numa = " + numa);

      double[] values = new double[numa];
      for (int j = 0; j < numa; j++)
        values[j] = 0;
      MiningVector mv = new MiningVector(values);
      mv.setMetaData(modelMetaData.getPretransformedMetaData());
      System.out.println(mv);
      DecisionTreeNode dtn = (DecisionTreeNode) model.applyModel(mv);

      int sizeTarget = dtn.getDistribution().length;
      System.out.println("sizeTarget = " + sizeTarget);
      for (int j = 0; j < sizeTarget; j++) {
        System.out.println("Target " + j);
        System.out.println("D " + dtn.getDistribution()[j]);
        System.out.println("CRCT " + dtn.getCumulatedRecordCountThis()[j]);
        System.out.println("CRCO " + dtn.getCumulatedRecordCountOther()[j]);
        System.out.println("TRCT " + dtn.getTotalRecordCountThis()[j]);
        System.out.println("TRCO " + dtn.getTotalRecordCountOther()[j]);
      }

      for (int j = 0; j < numa - 1; j++)
        values[j] = 5;
      mv = new MiningVector(values);
      mv.setMetaData(modelMetaData.getPretransformedMetaData());
      System.out.println(mv);
      dtn = (DecisionTreeNode) model.applyModel(mv);
      for (int j = 0; j < sizeTarget; j++) {
        System.out.println("Target " + j);
        System.out.println("D " + dtn.getDistribution()[j]);
        System.out.println("CRCT " + dtn.getCumulatedRecordCountThis()[j]);
        System.out.println("CRCO " + dtn.getCumulatedRecordCountOther()[j]);
        System.out.println("TRCT " + dtn.getTotalRecordCountThis()[j]);
        System.out.println("TRCO " + dtn.getTotalRecordCountOther()[j]);
      }
    }

    // Calculate Gains chart for two-class problems:
    if ( calcGainsChart && modelTargetAttribute.getCategoriesNumber() == 2 ) {
      inputData.reset();
      GainsChartAssessment gc = new GainsChartAssessment();
      gc.setMiningModel(model);
      gc.setAssessmentData(inputData);
      gc.setUseNormalization(false);
      gc.setUseInterpolation(false);
      double[] gcval = gc.calculateGainsChart();
      System.out.println("G a i n s  C h a r t: ");
      for (int j = 0; j < gcval.length; j++)
        System.out.print(gcval[j]+" ");
      System.out.println();
      inputData.reset();
      System.out.println("Gains chart rate = " + gc.calculateAssessment());
    };
  }

  /**
   * Example of using a nonlinear decision tree for classification.
   *
   * @param args arguments (ignored)
   */
  public static void main(String[] args) {

    try {
      new NonlinearDecisionTreeApply().runExample();
    }
    catch (Exception ex) {
      ex.printStackTrace();
    }
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -