📄 decisiontreeoperator.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* $Author$
* $Date$
* $Revision$
*/
package eti.bi.alphaminer.patch.standard.operation.operator;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningStoredData;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Supervised.SupervisedMiningSettings;
import com.prudsys.pdm.Utils.GeneralUtils;
import eti.bi.alphaminer.core.handler.ICaseHandler;
import eti.bi.alphaminer.operation.operator.INodeInfo;
import eti.bi.alphaminer.operation.operator.ModelOperator;
import eti.bi.alphaminer.operation.operator.Operator;
import eti.bi.alphaminer.vo.BIData;
import eti.bi.alphaminer.vo.BIModel;
import eti.bi.alphaminer.vo.BIObject;
import eti.bi.alphaminer.vo.IBIData;
import eti.bi.alphaminer.vo.IBIModel;
import eti.bi.alphaminer.vo.IOperatorNode;
import eti.bi.common.Locale.Resource;
import eti.bi.exception.AppException;
import eti.bi.exception.SysException;
/**
* DecisionTreeOperator is a kind of Operator
*/
public class DecisionTreeOperator extends ModelOperator {
/**
*
*/
private static final long serialVersionUID = 1L;
/**
* @param a_CaseID
* @param a_CaseWindow
* @param aOperatorInfo
*/
public DecisionTreeOperator(String a_CaseID, INodeInfo aNodeInfo, ICaseHandler aCaseHandler) {
super(a_CaseID, aNodeInfo, aCaseHandler);
//2006/07/29 Xiaojun Chen
PredictionAssessmentOperator.registerParentsDefinitionID(aNodeInfo.getDefinitionID());
ScoreOperator.registerParentsDefinitionID(aNodeInfo.getDefinitionID());
}
/* Parameter name for Decision Tree Operator in BIML */
/* Tree settings */
public static String PRUNING = "Pruning";
public static String PRUNING_METHOD = "Pruning method";
public static String CONFIDENCE = "Confidence threshold";
public static String SUBTREE = "Subtree raising";
public static String FOLDS = "Number folds";
public static String SEED = "Random seed";
public static String CLEANUP = "Clean up";
/* Model selection settings */
public static String NODE_SIZE = "Minimum node size";
public static String BINARY_SPLITS = "Binary splits";
/* Parameter value for Association Operator */
public static String VALUE_NORMAL_PRUNING = "Normal";
public static String VALUE_REDUCED_ERROR_PRUNING = "Reduced error";
/* Default parameter value for Association Operator */
public static String DEFAULT_PRUNING = String.valueOf(true);
public static String DEFAULT_PRUNING_METHOD = VALUE_NORMAL_PRUNING;
public static String DEFAULT_CONFIDENCE = "0.25";
public static String DEFAULT_SUBTREE = String.valueOf(true);
public static String DEFAULT_FOLDS = "3";
public static String DEFAULT_SEED = "1";
public static String DEFAULT_CLEANUP = String.valueOf(true);
public static String DEFAULT_NODE_SIZE = "2";
public static String DEFAULT_BINARY_SPLITS = String.valueOf(false);
/* Parameter name for MiningSettingSpecification and MiningAlgorithm */
private static String ALGORITHM_NAME = "J48 (Weka)";
private static String MAP_WEKA_CLASS_PARAMETERS = "wekaClassParameters";
/* Vectors storing Decision tree model statistics */
private Vector m_Predicted;
/**
* Set node id and update operator text of the DecisionTreeOperator at the same time.
* @param a_NodeID ID of the node
*/
public void setNodeID(String a_NodeID) {
setLabel(getDescription() + " [" + a_NodeID + "]");
setDefaultModelName(Resource.srcStr("DecisionTree")+"_" + a_NodeID);
super.setNodeID(a_NodeID);
}
/**
* Set node id and update operator text of the DecisionTreeOperator at the same time.
* @param a_NodeID ID of the node
*/
public void setDescription(String a_Description) {
m_Description = a_Description;
setLabel(m_Description + " [" + m_NodeID + "]");
setDefaultModelName(Resource.srcStr("DecisionTree")+"_" + m_NodeID);
}
/**
* Test if the Decision Tree Operator contains any results.
* @return true if Decision Tree Operator has result; false otherwise.
*/
public boolean hasResult()
{
if (m_OutputBIObject != null)
{
return (m_OutputBIObject.hasResult(BIObject.DATA) &&
m_OutputBIObject.hasResult(BIObject.MODEL));
}else
{
return false;
}
}
/**
* Gets the predicted values.
* @return a vector storing predicted values.
*/
public Vector getPredicted() {
return m_Predicted;
}
/**
* Clear the stored predicted values.
*/
public void clearPredicted()
{
m_Predicted = null;
}
/**
* Add an entry of predicted target.
* @param predict the predicted target.
*/
@SuppressWarnings("unchecked")
public void addPredicted(String predict) {
if (m_Predicted==null)
m_Predicted = new Vector();
if (predict==null)
predict = "";
m_Predicted.addElement(predict);
}
/**
* Build decision tree model for this Decision Tree Operator.
* @param a_OperatorNode Operator Node represented by this Decision Tree Operator.
* @param a_Parents a Vector storing node IDs of parent nodes of this Decision Tree Operator.
*/
public void execute(IOperatorNode a_OperatorNode, Vector a_Parents)
throws MiningException, SysException, AppException
{
/* Get parameter from user input */
String pruningString = (String) a_OperatorNode.getParameterValue(PRUNING);
boolean pruning = true;
if (pruningString==null)
pruningString = DEFAULT_PRUNING;
else
pruning = new Boolean(pruningString).booleanValue();
String pruningMethod = null;
String confidenceThreshold = null;
boolean subtreeRaising = true;
String numFolds = null;
String randomSeed = null;
if (pruning) {
pruningMethod = (String) a_OperatorNode.getParameterValue(PRUNING_METHOD);
if (pruningMethod==null)
pruningMethod = DEFAULT_PRUNING_METHOD;
if (pruningMethod.equals(VALUE_REDUCED_ERROR_PRUNING)) {
numFolds = (String) a_OperatorNode.getParameterValue(FOLDS);
if (numFolds==null)
numFolds = DEFAULT_FOLDS;
randomSeed = (String) a_OperatorNode.getParameterValue(SEED);
if (randomSeed==null)
randomSeed = DEFAULT_SEED;
} else {
confidenceThreshold = (String) a_OperatorNode.getParameterValue(CONFIDENCE);
if (confidenceThreshold==null)
confidenceThreshold = DEFAULT_CONFIDENCE;
String subtreeRaisingString = (String) a_OperatorNode.getParameterValue(SUBTREE);
if (subtreeRaisingString==null)
subtreeRaisingString = DEFAULT_SUBTREE;
else
subtreeRaising = new Boolean(subtreeRaisingString).booleanValue();
}
}
String cleanupString = (String) a_OperatorNode.getParameterValue(CLEANUP);
boolean cleanup = true;
if (cleanupString==null)
cleanupString = DEFAULT_CLEANUP;
else
cleanup = new Boolean(cleanupString).booleanValue();
String nodeSize = (String) a_OperatorNode.getParameterValue(NODE_SIZE);
if (nodeSize==null)
nodeSize = DEFAULT_NODE_SIZE;
String binarySplitsString = (String) a_OperatorNode.getParameterValue(BINARY_SPLITS);
boolean binarySplits = false;
if (binarySplitsString==null)
binarySplitsString = DEFAULT_BINARY_SPLITS;
else
binarySplits = new Boolean(binarySplitsString).booleanValue();
String decisionTreeParameters = "";
if (!cleanup)
decisionTreeParameters += "-L ";
if (pruning) {
if (pruningMethod.equals(VALUE_NORMAL_PRUNING)) {
if (!subtreeRaising)
decisionTreeParameters += "-S ";
decisionTreeParameters += "-C " + confidenceThreshold + " ";
} else {
decisionTreeParameters += "-S -R -N " + numFolds + " -Q " + randomSeed + " ";
}
} else {
decisionTreeParameters += "-U ";
}
if (binarySplits)
decisionTreeParameters += "-B ";
decisionTreeParameters += "-M " + nodeSize;
/* Get input bi object from parent node */
Operator parentOp = (Operator)a_Parents.elementAt(0);
setInputBIObject(parentOp.getOutputBIObject());
IBIData aInputBIData = getInputBIObject().getBIData();
aInputBIData.getMiningStoredData().reset();
/* Prepare output data model */
BIData aOutputBIData = new BIData(getCaseID(), getNodeID());
aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
aOutputBIData.setTransformActionHistory(aInputBIData.getTransformActionHistory());
aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
aOutputBIData.setMiningStoredData(aInputBIData.getMiningStoredData());
BIModel aOutputBIModel = new BIModel(getCaseID(), getNodeID(), IBIModel.TYPE_CLASSIFIER);
/* Execute model building */
MiningAttribute targetAttribute = aInputBIData.getTargetAttribute();
aOutputBIModel.setTargetAttribute(targetAttribute);
if (targetAttribute==null)
{
m_SystemMessageHandler.appendMessage("Categorical Target attribute is missing. Please add target attribute by using Data Set Attribute Node.");
throw new AppException("Categorical Target attribute is missing. Please add target attribute by using Data Set Attribute Node.");
}else if (!(targetAttribute instanceof CategoricalAttribute))
{
m_SystemMessageHandler.appendMessage("Attribute \""+targetAttribute.getName() + "\" is not Categorical.");
throw new AppException("Attribute \""+targetAttribute.getName() + "\" is not Categorical.");
}
// check the range of the numFolds. TWang. Mar 24. 2005.
if (pruning && pruningMethod.equals(VALUE_REDUCED_ERROR_PRUNING)) {
int maxFoldNum = numberOfInstance(aInputBIData);
if (Integer.parseInt(numFolds) > maxFoldNum){
m_SystemMessageHandler.appendMessage("Num of Folds value is too large.");
throw new AppException("Num of folds value should be be smaller than or equal to " + maxFoldNum);
}
}
if(((CategoricalAttribute)targetAttribute).isUnboundedCategories()){
m_SystemMessageHandler.appendMessage("Categorical Target attribute must be bounded.");
throw new AppException("Attribute \""+targetAttribute.getName() + "\" should be a bounded Categorical attribute.");
}// >> END Twang.
/* Create MiningSettings object and assign metadata */
SupervisedMiningSettings miningSettings = new SupervisedMiningSettings();
miningSettings.setDataSpecification(aInputBIData.getMetaData());
/* Assign settings */
miningSettings.setTarget(targetAttribute);
try {
miningSettings.verifySettings();
} catch (Exception e)
{
m_SystemMessageHandler.appendMessage("Invalid parameters in building the Decision Tree model.");
throw new AppException("Invalid parameters in building the Decision Tree model.");
}
/* Set MiningSettings */
aOutputBIModel.setMiningSettings(miningSettings);
/* Get default mining algorithm specification from 'algorithms.xml' */
MiningAlgorithmSpecification miningAlgorithmSpecification =
MiningAlgorithmSpecification.getMiningAlgorithmSpecification( ALGORITHM_NAME ,getNodeInfo());
if( miningAlgorithmSpecification == null )
throw new MiningException( "Can't find Decision Tree classification method." );
/* Get class name from algorithms specification */
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
throw new MiningException( "className attribute expected." );
/* Set MiningAlgorithmSpecification */
miningAlgorithmSpecification.setMAPValue(MAP_WEKA_CLASS_PARAMETERS, decisionTreeParameters);
aOutputBIModel.setMiningAlgorithmSpecification(miningAlgorithmSpecification);
displayMiningAlgSpecParameters(miningAlgorithmSpecification);
/* Set and display mining parameters */
GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);
/* Create algorithm object with default values */
MiningAlgorithm algorithm = GeneralUtils.createMiningAlgorithmInstance(className, this.getClass().getClassLoader());
algorithm.setMiningInputStream(aInputBIData.getMiningStoredData());
algorithm.setMiningSettings(miningSettings);
algorithm.setMiningAlgorithmSpecification(miningAlgorithmSpecification);
try
{
algorithm.verify();
} catch(IllegalArgumentException e)
{
throw new MiningException(e.getMessage());
}
MiningModel model = algorithm.buildModel();
/* if (model instanceof SupervisedMiningModel)
{
int i=0;
}
if (model instanceof WekaSupervisedMiningModel)
{
int i=0;
}*/
m_SystemMessageHandler.appendMessage(Resource.srcStr("calculationtime")+" [s]: " + algorithm.getTimeSpentToBuildModel()+Resource.srcStr("ms"));
m_SystemMessageHandler.nextLine();
/* set output mining data and model to the output mining object */
aOutputBIModel.setMiningModel(model);
// aOutputBIModel.setModelName("Decision Tree_"+a_OperatorNode.getNodeID());
aOutputBIModel.setModelName(m_DefaultModelName);
m_OutputBIObject.setBIData(aOutputBIData);
m_OutputBIObject.setBIModel(aOutputBIModel);
/* set run time parameter value to the node object (It needs to be stored in the BIML) */
//a_OperatorNode.setParameterValue("Temporary model", aOutputBIModel.getTempBIModelPath());
//aOutputBIModel.writeTempBIModel();
}
/**
* @return
*/
private int numberOfInstance(IBIData aInputBIData) {
/* Execute model building */
MiningAttribute targetAttribute = aInputBIData.getTargetAttribute();
int targetIndex = -1;
MiningStoredData origInstances = aInputBIData.getMiningStoredData();
try {
targetIndex = origInstances.getMetaData().getAttributeIndex(targetAttribute);
} catch (MiningException e) {
e.printStackTrace();
}
int numberOfInstances = origInstances.getVectorsNumber();
for (int i = 0; i < origInstances.getVectorsNumber() ; i++) {
MiningVector vector = (MiningVector) origInstances.get(i);
if (vector.isMissing(targetIndex) ){
numberOfInstances--;
}
}
return numberOfInstances;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -