📄 decisiontreeminingmodel.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (ValentineStepanenko@zsoft.ru)
* @author Michael Thess
* @author Rolf Rossius
* @version 1.0
*/
package com.prudsys.pdm.Models.Classification.DecisionTree;
import java.io.*;
import java.util.*;
import com.prudsys.pdm.Core.*;
import com.prudsys.pdm.Input.*;
import com.prudsys.pdm.Transform.*;
import com.prudsys.pdm.Models.Classification.*;
import com.prudsys.pdm.Adapters.PmmlVersion20.*;
import com.prudsys.pdm.Utils.*;
/**
* Description of data produced by a decision tree mining function.
* The classifier must be of the type DecisionTreeNode. <p>
*
* From PDM CWM extension. <p>
*
* Superclasses:
* <ul>
* <li> ClassificationMiningModel
* </ul>
*
* In addition, functionality from PMML was added.
* It corresponds to the PMML element TreeModel.
*
* @see MiningModel
* @see com.prudsys.pdm.Adapters.PmmlVersion20.TreeModel
*/
public class DecisionTreeMiningModel extends ClassificationMiningModel
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Nonlinear Decision Tree. Contains at least one RegressionTreeNode. */
private boolean nonlinear = false;
/** Global Support Vectors for Nonlinear Decision Tree with SVM nodes. */
private Hashtable globalSupportVectors = null;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Constructor sets function and algorithm parameters.
*/
public DecisionTreeMiningModel()
{
function = MiningModel.CLASSIFICATION_FUNCTION;
algorithm = MiningModel.DECISION_TREE_ALGORITHM;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Is nonlinear decision tree, i.e. contains at least one RegressionTreeNode.
*
* @return true if NDT, otherwise false
*/
public boolean isNonlinear()
{
return nonlinear;
}
/**
* Set nonlinear decision tree, i.e. contains at least one RegressionTreeNode.
*
* @param nonlinear set new NDT status
*/
public void setNonlinear(boolean nonlinear)
{
this.nonlinear = nonlinear;
}
/**
* Returns hashtable of global support vectors. Global
* support vectors are shared by all SVM models of the
* corresponding regression tree nodes.
*
* The keys of the hashtable are the unique IDs of the
* support vectors and the values are the mining vectors.
*
* @return hashtable of global support vectors
*/
public Hashtable getGlobalSupportVectors()
{
return globalSupportVectors;
}
/**
* Sets hashtable of global support vectors. Global
* support vectors are shared by all SVM models of the
* corresponding regression tree nodes.
*
* The keys of the hashtable are the unique IDs of the
* support vectors and the values are the mining vectors.
*
* @param globalSupportVectors new hashtable of global support vectors
*/
public void setGlobalSupportVectors(Hashtable globalSupportVectors)
{
this.globalSupportVectors = globalSupportVectors;
}
// -----------------------------------------------------------------------
// Apply tree to mining vector
// -----------------------------------------------------------------------
/**
* Applies decistion tree to a mining vector. Returns
* final decision tree node containing the vector. In general,
* the meta data of the mining vector should be similar to the metaData
* of this class. This ensures compatibility of training and
* application data. Especially, the mining vector also should countain
* the target attribute which was used for training.
*
* @param miningData mining vector where the tree should be applied
* @return decision tree node of segment containing the vector
* @throws MiningException if there are some errors when model is applied
*/
public MiningMatrixElement applyModel(MiningMatrixElement miningData)
throws MiningException {
// Cast to mining vector:
if ( !(miningData instanceof MiningVector) )
throw new MiningException("miningData must be a mining vector");
MiningVector miningVector = (MiningVector) miningData;
// DISCOVERER-specific inner transformations to save time:
if ( applicationName.equals( MiningModel.APPLICATION_PRUDSYS_DISCOVERER ) ) {
miningVector = addTargetAttribute(miningVector);
miningVector = miningTransformDiscoverer(miningVector);
}
else
// Run inner transformations (e.g. missing values replacement, outliers):
if (miningTransform != null) {
miningVector = miningTransform.transform(miningVector);
}
// Get root node and apply decision tree:
DecisionTreeNode root = (DecisionTreeNode) classifier;
return root.applyForNode(miningVector);
}
// -----------------------------------------------------------------------------
// DISCOVERER-specific preprocessing...
// -----------------------------------------------------------------------------
/** Pretransformed meta data. For DISCOVERER caching. */
protected MiningDataSpecification pretransMetaData = null;
/** Transformed meta data. For DISCOVERER caching. */
protected MiningDataSpecification transMetaData = null;
/** No target attribute in input data. */
protected boolean noTarget = true;
/**
* Adds target attribute to input vector. Does not change
* the vector if it does contain a target attribute.
*
* @param vector input vector
* @return output vector with target attribute
* @throws MiningException error while trying to add target vector
*/
private MiningVector addTargetAttribute(MiningVector vector)
throws MiningException {
// Transform meta data:
MiningDataSpecification vecMetaData = vector.getMetaData();
if (vecMetaData != null) {
// Use caching if possible:
if (vecMetaData != pretransMetaData) {
String className = target.getName();
MiningAttribute vecTarget = vecMetaData.getMiningAttribute(className);
if (vecTarget != null)
noTarget = false;
else
noTarget = true;
// Create reduced meta data:
if (noTarget) {
MiningAttribute tAtt = miningSettings.getDataSpecification().getMiningAttribute(className);
transMetaData = (MiningDataSpecification) vecMetaData.clone();
transMetaData.addMiningAttribute(tAtt);
};
// Caching:
pretransMetaData = vecMetaData;
}
}
else {
if (transMetaData == null)
throw new MiningException("No meta data ressource to add target attribute");
}
// Vector already contains target attribute => return:
if (!noTarget)
return vector;
// If target attribute => add target to vector:
MiningVector mv = vector;
int nAtt = vector.getValues().length;
double[] vec = new double[nAtt+1];
for (int i = 0; i < nAtt; i++)
vec[i] = vector.getValue(i);
vec[nAtt] = 0;
mv = new MiningVector(vec);
mv.setMetaData( transMetaData );
return mv;
}
/** Meta data of input vector. For DISCOVERER caching. */
private MiningDataSpecification preMetaData = null;
/** Attribute types (num = 0, cat = 1, tar = 2). For DISCOVERER caching. */
private byte[] preTypes = null;
/** Missing value replacements. For DISCOVERER caching. */
private double[] repValues = null;
/** Category replacement. For DISCOVERER caching. */
private Hashtable[] repCategs = null;
/** Additional missing values of numeric attributes. */
private double[][] missValuesNum = null;
/**
* Perform inner transformations for prudsys DISCOVERER.
* Uses DISCOVERER-specific missing value handling and caching
* and is faster than standard inner transformation.
*
* @param miningVector mining vector to transform
* @return transformed mining vector
* @throws MiningException input vector has no meta data
*/
private MiningVector miningTransformDiscoverer(MiningVector miningVector)
throws MiningException {
// Get model and vector meta data:
MiningDataSpecification metaData = miningSettings.getDataSpecification();
MiningDataSpecification inputMetaData = miningVector.getMetaData();
if (inputMetaData == null)
throw new MiningException("mining vector has no meta data");
int nAtt = inputMetaData.getAttributesNumber();
// First time, fill all data structures for caching:
if (inputMetaData != preMetaData) {
preTypes = new byte[nAtt];
repValues = new double[nAtt];
repCategs = new Hashtable[nAtt];
missValuesNum = new double[nAtt][];
String tname = target.getName();
for (int i = 0; i < nAtt; i++) {
MiningAttribute mAtt = inputMetaData.getMiningAttribute(i);
String mAttName = mAtt.getName();
// Target attribute:
if (mAttName.equals(tname) ) {
preTypes[i] = 2;
continue;
}
// Set default missing value replacement value:
double value = 0;
// Numeric attribute:
if (mAtt instanceof NumericAttribute) {
preTypes[i] = 0;
ApplicationAttribute appAtt = inputSpec.getApplicationAttribute( mAttName );
if (appAtt != null) {
// Replacement value:
String repValue = appAtt.getMissingValueReplacement();
try {
value = Double.parseDouble(repValue);
}
catch (Exception ex) {;}
// Further missing values:
if ( appAtt.getMissingValues() != null ) {
String[] mv = appAtt.getMissingValues();
Vector missVal = new Vector();
for (int j = 0; j < mv.length; j++) {
try {
double val = Double.parseDouble(mv[j]);
missVal.addElement( new Double(val) );
}
catch (Exception ex) {};
};
int mSize = missVal.size();
if (mSize > 0) {
missValuesNum[i] = new double[mSize];
for (int j = 0; j < mSize; j++)
missValuesNum[i][j] = ((Double) missVal.elementAt(j)).doubleValue();
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -