⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 decisiontreeoperator.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * $Author$
 * $Date$
 * $Revision$
 */
package eti.bi.alphaminer.patch.standard.operation.operator;


import java.util.Vector;


import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningStoredData;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Supervised.SupervisedMiningSettings;
import com.prudsys.pdm.Utils.GeneralUtils;

import eti.bi.alphaminer.core.handler.ICaseHandler;
import eti.bi.alphaminer.operation.operator.INodeInfo;
import eti.bi.alphaminer.operation.operator.ModelOperator;
import eti.bi.alphaminer.operation.operator.Operator;
import eti.bi.alphaminer.vo.BIData;
import eti.bi.alphaminer.vo.BIModel;
import eti.bi.alphaminer.vo.BIObject;
import eti.bi.alphaminer.vo.IBIData;
import eti.bi.alphaminer.vo.IBIModel;
import eti.bi.alphaminer.vo.IOperatorNode;
import eti.bi.common.Locale.Resource;
import eti.bi.exception.AppException;
import eti.bi.exception.SysException;

/**
 * DecisionTreeOperator is a kind of Operator
 */
public class DecisionTreeOperator extends ModelOperator {

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	/**
	 * @param a_CaseID
	 * @param a_CaseWindow
	 * @param aOperatorInfo
	 */
	public DecisionTreeOperator(String a_CaseID, INodeInfo aNodeInfo, ICaseHandler aCaseHandler) {
		super(a_CaseID, aNodeInfo, aCaseHandler);
		//2006/07/29 Xiaojun Chen
		PredictionAssessmentOperator.registerParentsDefinitionID(aNodeInfo.getDefinitionID());
		ScoreOperator.registerParentsDefinitionID(aNodeInfo.getDefinitionID());
	}

	/* Parameter name for Decision Tree Operator in BIML */
	/* Tree settings */
	public static String PRUNING = "Pruning";
	public static String PRUNING_METHOD = "Pruning method";	
	public static String CONFIDENCE = "Confidence threshold";
	public static String SUBTREE = "Subtree raising";
	public static String FOLDS = "Number folds";
	public static String SEED = "Random seed";
	public static String CLEANUP = "Clean up";
	
	/* Model selection settings */
	public static String NODE_SIZE = "Minimum node size";
	public static String BINARY_SPLITS = "Binary splits";
	
	/* Parameter value for Association Operator */
	public static String VALUE_NORMAL_PRUNING = "Normal";
	public static String VALUE_REDUCED_ERROR_PRUNING = "Reduced error";
	
	/* Default parameter value for Association Operator */
	public static String DEFAULT_PRUNING = String.valueOf(true);
	public static String DEFAULT_PRUNING_METHOD = VALUE_NORMAL_PRUNING;	
	public static String DEFAULT_CONFIDENCE = "0.25";
	public static String DEFAULT_SUBTREE = String.valueOf(true);
	public static String DEFAULT_FOLDS = "3";
	public static String DEFAULT_SEED = "1";
	public static String DEFAULT_CLEANUP = String.valueOf(true);
	
	public static String DEFAULT_NODE_SIZE = "2";
	public static String DEFAULT_BINARY_SPLITS = String.valueOf(false);
	
	/* Parameter name for MiningSettingSpecification and MiningAlgorithm */
	private static String ALGORITHM_NAME = "J48 (Weka)";
	private static String MAP_WEKA_CLASS_PARAMETERS = "wekaClassParameters";
	
	/* Vectors storing Decision tree model statistics */
	private Vector m_Predicted;	 
	
	/**
	 * Set node id and update operator text of the DecisionTreeOperator at the same time.
	 * @param a_NodeID ID of the node
	 */
	public void setNodeID(String a_NodeID) {
		setLabel(getDescription() + " [" + a_NodeID + "]");
		setDefaultModelName(Resource.srcStr("DecisionTree")+"_" + a_NodeID);
		super.setNodeID(a_NodeID);
	}
	
	/**
	 * Set node id and update operator text of the DecisionTreeOperator at the same time.
	 * @param a_NodeID ID of the node
	 */
	public void setDescription(String a_Description) {
		m_Description = a_Description;
		setLabel(m_Description + " [" + m_NodeID + "]");
		setDefaultModelName(Resource.srcStr("DecisionTree")+"_" + m_NodeID);
	}
	
	/**
	 * Test if the Decision Tree Operator contains any results.
	 * @return true if Decision Tree Operator has result; false otherwise.
	 */
	public boolean hasResult()
	{
		if (m_OutputBIObject != null)
		{
			return (m_OutputBIObject.hasResult(BIObject.DATA) &&
					m_OutputBIObject.hasResult(BIObject.MODEL));
		}else
		{
			return false;
		}
	}

	/**
	 * Gets the predicted values.
	 * @return a vector storing predicted values.
	 */
	public Vector getPredicted() {
		return m_Predicted;
	}
	
	/**
	 * Clear the stored predicted values.
	 */
	public void clearPredicted()
	{
		m_Predicted = null;
	}
	
	/**
	 * Add an entry of predicted target. 
	 * @param predict the predicted target.
	 */
	@SuppressWarnings("unchecked")
	public void addPredicted(String predict) {
		if (m_Predicted==null)
			m_Predicted = new Vector();
			
		if (predict==null)
			predict = "";
		m_Predicted.addElement(predict);	
	}

	/**
	 * Build decision tree model for this Decision Tree Operator.
	 * @param a_OperatorNode Operator Node represented by this Decision Tree Operator.
	 * @param a_Parents a Vector storing node IDs of parent nodes of this Decision Tree Operator.
	 */
	public void execute(IOperatorNode a_OperatorNode, Vector a_Parents)
		throws MiningException, SysException, AppException
	{	
		/* Get parameter from user input */
		String pruningString = (String) a_OperatorNode.getParameterValue(PRUNING);
		boolean pruning = true;
		if (pruningString==null)
			pruningString = DEFAULT_PRUNING;
		else
			pruning = new Boolean(pruningString).booleanValue();
		
		String pruningMethod = null;
		String confidenceThreshold = null;
		boolean subtreeRaising = true;
		String numFolds = null;
		String randomSeed = null;
		
		if (pruning) {
			pruningMethod = (String) a_OperatorNode.getParameterValue(PRUNING_METHOD);
			if (pruningMethod==null)
				pruningMethod = DEFAULT_PRUNING_METHOD;
			
			if (pruningMethod.equals(VALUE_REDUCED_ERROR_PRUNING)) {
				
				numFolds = (String) a_OperatorNode.getParameterValue(FOLDS);
				if (numFolds==null)
					numFolds = DEFAULT_FOLDS; 
				randomSeed = (String) a_OperatorNode.getParameterValue(SEED);
				if (randomSeed==null)
					randomSeed = DEFAULT_SEED;
					
			} else {
				
				confidenceThreshold = (String) a_OperatorNode.getParameterValue(CONFIDENCE);
				if (confidenceThreshold==null)
					confidenceThreshold = DEFAULT_CONFIDENCE;
				
				String subtreeRaisingString = (String) a_OperatorNode.getParameterValue(SUBTREE);
				if (subtreeRaisingString==null)
					subtreeRaisingString = DEFAULT_SUBTREE;
				else
					subtreeRaising = new Boolean(subtreeRaisingString).booleanValue();
			}
			
		}
		
		String cleanupString = (String) a_OperatorNode.getParameterValue(CLEANUP);
		boolean cleanup = true;
		if (cleanupString==null)
			cleanupString = DEFAULT_CLEANUP;
		else
			cleanup = new Boolean(cleanupString).booleanValue();
		
		String nodeSize = (String) a_OperatorNode.getParameterValue(NODE_SIZE);
		if (nodeSize==null)
			nodeSize = DEFAULT_NODE_SIZE;
		
		String binarySplitsString = (String) a_OperatorNode.getParameterValue(BINARY_SPLITS);
		boolean binarySplits = false;
		if (binarySplitsString==null)
			binarySplitsString = DEFAULT_BINARY_SPLITS;
		else
			binarySplits = new Boolean(binarySplitsString).booleanValue();

		String decisionTreeParameters = "";
		if (!cleanup)
			decisionTreeParameters += "-L ";
		
		if (pruning) {
			
			if (pruningMethod.equals(VALUE_NORMAL_PRUNING)) {
				
				if (!subtreeRaising)
					decisionTreeParameters += "-S ";
				decisionTreeParameters += "-C " + confidenceThreshold + " ";
				
			} else {
				decisionTreeParameters += "-S -R -N " + numFolds + " -Q " + randomSeed + " ";
			}
			
		} else {
			decisionTreeParameters += "-U ";
		}
		
		if (binarySplits)
			decisionTreeParameters += "-B ";
		decisionTreeParameters += "-M " + nodeSize;
		
		/* Get input bi object from parent node */
		Operator parentOp = (Operator)a_Parents.elementAt(0);
		setInputBIObject(parentOp.getOutputBIObject());
		IBIData aInputBIData = getInputBIObject().getBIData();
		aInputBIData.getMiningStoredData().reset();
	
		/* Prepare output data model */
		BIData aOutputBIData = new BIData(getCaseID(), getNodeID());
		aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
		aOutputBIData.setTransformActionHistory(aInputBIData.getTransformActionHistory());
		aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
		aOutputBIData.setMiningStoredData(aInputBIData.getMiningStoredData());
		BIModel aOutputBIModel = new BIModel(getCaseID(), getNodeID(), IBIModel.TYPE_CLASSIFIER);
		
		/* Execute model building */
		MiningAttribute targetAttribute = aInputBIData.getTargetAttribute();
		aOutputBIModel.setTargetAttribute(targetAttribute);
		if (targetAttribute==null)
		{
			m_SystemMessageHandler.appendMessage("Categorical Target attribute is missing. Please add target attribute by using Data Set Attribute Node.");
			throw new AppException("Categorical Target attribute is missing. Please add target attribute by using Data Set Attribute Node.");
		}else if (!(targetAttribute instanceof CategoricalAttribute))
		{
			m_SystemMessageHandler.appendMessage("Attribute \""+targetAttribute.getName() + "\" is not Categorical.");
			throw new AppException("Attribute \""+targetAttribute.getName() + "\" is not Categorical.");
		}
		
		// check the range of the numFolds. TWang. Mar 24. 2005.
		if (pruning && pruningMethod.equals(VALUE_REDUCED_ERROR_PRUNING)) {
			int maxFoldNum = numberOfInstance(aInputBIData);
			if (Integer.parseInt(numFolds) > maxFoldNum){
				m_SystemMessageHandler.appendMessage("Num of Folds value is too large.");
				throw new AppException("Num of folds value should be be smaller than or equal to " + maxFoldNum);
			}
		}
		if(((CategoricalAttribute)targetAttribute).isUnboundedCategories()){
			m_SystemMessageHandler.appendMessage("Categorical Target attribute must be bounded.");
			throw new AppException("Attribute \""+targetAttribute.getName() + "\" should be a bounded Categorical attribute.");
		}// >> END Twang.

	    /* Create MiningSettings object and assign metadata */
	    SupervisedMiningSettings miningSettings = new SupervisedMiningSettings();
	    miningSettings.setDataSpecification(aInputBIData.getMetaData());

	    /* Assign settings */
	    miningSettings.setTarget(targetAttribute);		
	    try {
	    	miningSettings.verifySettings();
	    } catch (Exception e)
		{
	    	m_SystemMessageHandler.appendMessage("Invalid parameters in building the Decision Tree model.");
	    	throw new AppException("Invalid parameters in building the Decision Tree model.");
	    }	    
		/* Set MiningSettings */
		aOutputBIModel.setMiningSettings(miningSettings);

	    /* Get default mining algorithm specification from 'algorithms.xml' */
	    MiningAlgorithmSpecification miningAlgorithmSpecification =
	        MiningAlgorithmSpecification.getMiningAlgorithmSpecification( ALGORITHM_NAME ,getNodeInfo());

	    if( miningAlgorithmSpecification == null )
	      throw new MiningException( "Can't find Decision Tree classification method." );

	    /* Get class name from algorithms specification */
	    String className = miningAlgorithmSpecification.getClassname();
	    if( className == null )
	      throw new MiningException( "className attribute expected." );

		/* Set MiningAlgorithmSpecification */
		miningAlgorithmSpecification.setMAPValue(MAP_WEKA_CLASS_PARAMETERS, decisionTreeParameters);
		aOutputBIModel.setMiningAlgorithmSpecification(miningAlgorithmSpecification);
		displayMiningAlgSpecParameters(miningAlgorithmSpecification);
		
	    /* Set and display mining parameters */
	    GeneralUtils.displayMiningAlgSpecParameters(miningAlgorithmSpecification);

	    /* Create algorithm object with default values */
	    MiningAlgorithm algorithm = GeneralUtils.createMiningAlgorithmInstance(className, this.getClass().getClassLoader());
						
		algorithm.setMiningInputStream(aInputBIData.getMiningStoredData());
		algorithm.setMiningSettings(miningSettings);
		algorithm.setMiningAlgorithmSpecification(miningAlgorithmSpecification);
		try
		{
			algorithm.verify();
		} catch(IllegalArgumentException e)
		{
			throw new MiningException(e.getMessage());
		}
		
		MiningModel model = algorithm.buildModel();
/*		if (model instanceof SupervisedMiningModel)
		{
			int i=0;
		}
		if (model instanceof WekaSupervisedMiningModel)
		{
			int i=0;
		}*/
		m_SystemMessageHandler.appendMessage(Resource.srcStr("calculationtime")+" [s]: " + algorithm.getTimeSpentToBuildModel()+Resource.srcStr("ms"));
		m_SystemMessageHandler.nextLine();
		
		/* set output mining data and model to the output mining object */ 
		aOutputBIModel.setMiningModel(model);
//		aOutputBIModel.setModelName("Decision Tree_"+a_OperatorNode.getNodeID());
		aOutputBIModel.setModelName(m_DefaultModelName);
		m_OutputBIObject.setBIData(aOutputBIData);	
		m_OutputBIObject.setBIModel(aOutputBIModel);
	
		/* set run time parameter value to the node object (It needs to be stored in the BIML) */
		//a_OperatorNode.setParameterValue("Temporary model", aOutputBIModel.getTempBIModelPath());		
	
		//aOutputBIModel.writeTempBIModel();
	}

	/**
	 * @return
	 */
	private int numberOfInstance(IBIData aInputBIData) {
		/* Execute model building */
		MiningAttribute targetAttribute = aInputBIData.getTargetAttribute();
		int targetIndex = -1;
		MiningStoredData origInstances = aInputBIData.getMiningStoredData();
		try {
			targetIndex = origInstances.getMetaData().getAttributeIndex(targetAttribute);
		} catch (MiningException e) { 
			e.printStackTrace();
		}
		int numberOfInstances = origInstances.getVectorsNumber(); 
		
		for (int i = 0; i < origInstances.getVectorsNumber() ; i++) {
		      MiningVector vector = (MiningVector) origInstances.get(i);
		      if (vector.isMissing(targetIndex) ){
		     		numberOfInstances--;
		     }
		}  
		return numberOfInstances;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -