📄 predictionassessmentoperator.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
				
				m_InputBIDatasets[m_inputDatasetsNum] = aBIObject.getBIData();
				m_InputBIDatasetsNames[m_inputDatasetsNum] = datasetName;
				m_inputDatasetsNum++;
			 }
			}
		}
		
		 
	}
	/**
	 * Depth First Search the subtree rooted at a_parentOp. Record all the InputOperator instances.
	 * 
	 * By Twang, Feb 22, 2005.
	 * 
	 * @param a_parentOp
	 * @throws SysException
	 */
	@SuppressWarnings({"unused", "unchecked"})
	private void addDataSetRecursively(Operator a_parentOp) throws SysException {

		/**
		 * Currently, only the InputDataOperator is recognized and processed.
		 */

		Vector parentOperators = m_CaseHandler.getParentOperators(m_CaseID, a_parentOp.getNodeID());
		if (parentOperators == null)
			return;

		for (int i = 0; i < parentOperators.size(); i++) {
			Operator parentOp = (Operator) parentOperators.elementAt(i);
			if (parentOp instanceof InputOperator) {
				if (!(m_DatasetsIDList.contains(parentOp.getNodeID()))) {
					// The data source node ID is a subString of the Data Set Name. The position is
					// after DATASET_NAME_DELIMETER.
					IOperatorNode inputOperatorNode = m_CaseHandler.getOperatorNode(m_CaseID, parentOp.getNodeID());

					String dataSourceName = null;
					if (inputOperatorNode.getName() != null) {
						dataSourceName = inputOperatorNode.getName();
					} else {
						dataSourceName = parentOp.getDefaultDataSourceName();
					}
					m_DatasetsList.add(dataSourceName + PredictionAssessmentOperatorUtil.DATASET_NAME_DELIMETER
							+ parentOp.getNodeID());
					m_DatasetsIDList.add(parentOp.getNodeID());
				}
			} else {
				addDataSetRecursively(parentOp);
			}
		}
	}

	/**
	 * The intput data set name is the original default data source name + the parent node ID. The new name is created
	 * in AssessmentOperatorProperty class. TWang.
	 * 
	 * @param aOp
	 * @param selectedDatasetsName
	 * @return
	 * @throws SysException
	 */
	@SuppressWarnings("unused")
	private boolean isSelectedDataSet(Operator aOp, ArrayList selectedDatasetsName) throws SysException {
		for (int i = 0; i < selectedDatasetsName.size(); i++) {
			String datasetName = (String) selectedDatasetsName.get(i);
			if ((datasetName.indexOf(aOp.getDefaultDataSourceName()) != -1)
					&& (datasetName.indexOf(aOp.getNodeID()) != -1)) {
				return true;
			}
		}
		return false;
	}

	@SuppressWarnings("unchecked")
	private void caculateConfusionMatrices(double dbThreshold) throws MiningException, AppException, SysException {
		EvaluationMatrix evaluationMatrix = null;
		String[] classNames = null;

		for (int i = 0; i < m_inputModelsNum; i++) {
			String modelName = m_InputBIModels[i].getModelName();
			ArrayList valueList = getTargetAttribute(m_InputBIModels[i]).getValues();
			if (valueList.size() > 0) {
				classNames = new String[valueList.size()];
				for (int k = 0; k < valueList.size(); k++) {
					classNames[k] = ((Category) valueList.get(k)).getDisplayValue();
				}
			}

			for (int j = 0; j < m_inputDatasetsNum; j++) {
				String datasetName = m_InputBIDatasetsNames[j];

				// tranform the orginal data set accordingly.
				MiningStoredData transformedMiningStoredData = transformData(m_InputModelOperators[i],
						m_InputBIDatasets[j].getMiningStoredData());

				evaluationMatrix = runMatricesCalculation(m_InputBIModels[i], transformedMiningStoredData, dbThreshold);
				evaluationMatrix.setM_ModelName(modelName);
				evaluationMatrix.setM_DataName(datasetName);
				evaluationMatrix.setM_ClassNames(classNames);
				// set the statistics names in the runMatricesCalculation() method.
				// evaluationMatrix.setM_StatisticsNames(statisticsName);
				m_ConfusionMatrixLists.add(evaluationMatrix);
			}
		}
	}

	/**
	 * Transform a MiningStoredData instance by what has been done on an Operator's input data set. Feb. 22, 2005.
	 * 
	 * @param operator
	 * @param originalMiningStoredData
	 * @return
	 * @throws SysException
	 */
	private MiningStoredData transformData(Operator operator, MiningStoredData originalMiningStoredData)
			throws SysException {
		IBIData transformedData = operator.getInputBIObject().getBIData();
		Vector transformHistory = null;

		if (transformedData != null) {
			transformHistory = transformedData.getTransformActionHistory();
		}
		if (transformHistory == null) {
			throw new SysException("Transform action of the training is lost");
		}
		if (originalMiningStoredData == null) {
			throw new SysException("Input data source has no data.");
		}

		MiningStoredData transformedMiningStoredData = originalMiningStoredData;
		MiningStoredData fromData = null;

		// Loop the transformHistory, and apply the same transform actions to the scoring data source
		for (int t = 0; t < transformHistory.size(); t++) {
			Object obj = transformHistory.get(t);
			if (obj instanceof DataTransformAction) {
				DataTransformAction action = (DataTransformAction) obj;
				fromData = transformedMiningStoredData;
				try {
					transformedMiningStoredData = action.transform(fromData);
				} catch (Exception e) {
					throw new SysException("Unable to transform Assessment Data.");
				}
			}
		}
		return transformedMiningStoredData;
	}

	private EvaluationMatrix runMatricesCalculation(IBIModel inputModel, MiningStoredData assessmentData,
			double dbThreshold) throws MiningException, AppException, SysException {
		String[] statisticsName = null;
		EvaluationMatrix evaluationMatrix = null;
		// Assume that only weka's classifiers are used in KBBI Platform training version.
		// J48, and Logistic Regression.
		WekaSupervisedMiningModel miningModel = (WekaSupervisedMiningModel) inputModel.getMiningModel();
		String targetAttributeName = miningModel.getTarget().getName();
		WekaClassifier wekaClassifier = (WekaClassifier) miningModel.getClassifier();
		Object classifer = wekaClassifier.getWekaClassifier();

		// Get Weka instances from mining stream:
		Instances wekaInstances = null;
		try {
			// Reset the cursor of the MiningStoredData set, so the transform starts from
			// the first reord. Otherwise, the returned object might be NULL. TWang.
			assessmentData.reset();
			wekaInstances = (Instances) WekaCoreAdapter.PDMMiningInputStream2WekaInstances(assessmentData);
		} catch (Exception e) {
			e.printStackTrace();
			throw new MiningException("Could not call weka's model valuation module correctly.");
		};
		// Set the target class in WekaInstances
		Attribute targetAtt = wekaInstances.attribute(targetAttributeName);

		if (targetAtt != null) {
			wekaInstances.setClass(targetAtt);
		} else {
			throw new MiningException("Invalid model assessment data.");
		}

		// Call Weka's evaluation interfaces, for KBBI platform, no cost-matrix is
		// supplied in the training version.
		CostMatrix costMatrix = null;
		Evaluation wekaEvaluation = null;
		try {
			wekaEvaluation = new Evaluation(wekaInstances, costMatrix);
			if (wekaEvaluation != null) {
				// Call the modified interface for model evaluation.
				wekaEvaluation.eti_evaluateModel((Classifier) classifer, wekaInstances, dbThreshold, 0.90);
				evaluationMatrix = new EvaluationMatrix();
				if (evaluationMatrix != null)
					evaluationMatrix.setM_ConfusionMatrixElements(wekaEvaluation.confusionMatrix());
				int nNumClasses = wekaInstances.numClasses();
				double[][] dbStatisticalElement = null;

				// Set the statistics names/columns for different classifers
				if (classifer instanceof J48) {
					dbStatisticalElement = new double[nNumClasses][6];
					statisticsName = TREE_CLASSIFIERSTATISTICS_NAMES;
				} else if (classifer instanceof Logistic) {
					dbStatisticalElement = new double[nNumClasses][5];
					statisticsName = REGRESSION_CLASSIFIERSTATISTICS_NAMES;
				} else {
					dbStatisticalElement = new double[nNumClasses][6];
					statisticsName = TREE_CLASSIFIERSTATISTICS_NAMES;
				}
				evaluationMatrix.setM_StatisticsNames(statisticsName);
				evaluationMatrix.setM_OverallPrecision(wekaEvaluation.totalPrecision());

				for (int i = 0; i < nNumClasses; i++) {
					dbStatisticalElement[i][0] = wekaEvaluation.truePositiveRate(i);
					dbStatisticalElement[i][1] = wekaEvaluation.falsePositiveRate(i);
					dbStatisticalElement[i][2] = wekaEvaluation.precision(i);
					dbStatisticalElement[i][3] = wekaEvaluation.fMeasure(i);
					dbStatisticalElement[i][4] = 1 - wekaEvaluation.precision(i);
					if (classifer instanceof J48) {
						dbStatisticalElement[i][5] = wekaEvaluation.eti_truePositiveConfidence(i);
					}
				}

				if (dbStatisticalElement != null)
					evaluationMatrix.setM_StatisticsMatrixElements(dbStatisticalElement);
			}
		} catch (Exception e) {
			e.printStackTrace();
			throw new MiningException("Could not call weka's model valuation module correctly.");
		};

		return evaluationMatrix;
	}

	private CategoricalAttribute getTargetAttribute(IBIModel inputModel) {
		// Since only supervised models are shown in the assement property dialog for
		// user to choose, delcare the variable (SupervisedMiningSettings) directly, and get
		// the demanding information from it.
		SupervisedMiningSettings miningSettings = (SupervisedMiningSettings) inputModel.getMiningSettings();

		// Only categorical attributes can be set as target variables in
		// J48, and Logistic supervised models, delcare the variable (CategoricalAttribute)
		// directly, and get the demanding information from it.
		CategoricalAttribute miningAttribute = (CategoricalAttribute) miningSettings.getTarget();

		return miningAttribute;
	}

	@SuppressWarnings("unchecked")
	private void caculateEvaluationChartData(String chartType, int nTargetLevel) throws MiningException, AppException,
			SysException {
		EvaluationChartData evaluationChartData;

		for (int i = 0; i < m_inputModelsNum; i++) {
			String modelName = m_InputBIModels[i].getModelName();

			for (int j = 0; j < m_inputDatasetsNum; j++) {
				String datasetName = m_InputBIDatasetsNames[j];
				evaluationChartData = new EvaluationChartData(modelName, datasetName, 0);

				// tranform the orginal data set accordingly.
				MiningStoredData transformedMiningStoredData = transformData(m_InputModelOperators[i],
						m_InputBIDatasets[j].getMiningStoredData());

				double[][] chartDataElements = runChartDataCalculation(m_InputBIModels[i], transformedMiningStoredData,
						nTargetLevel, chartType, QUANTILE_NUM);
				evaluationChartData.setM_ChartDataElements(chartDataElements);
				m_EvaluationChartDataLists.add(evaluationChartData);
			}
		}
	}

	private double[][] runChartDataCalculation(IBIModel inputModel, MiningStoredData assessmentData, int nTargetLevel,
			String chartType, int nQuantileNum) throws MiningException, AppException, SysException {
		// Assume that only weka's classifiers are used in KBBI Platform training version.
		// J48, and Logistic Regression.
		WekaSupervisedMiningModel miningModel = (WekaSupervisedMiningModel) inputModel.getMiningModel();
		String targetAttributeName = miningModel.getTarget().getName();
		WekaClassifier wekaClassifier = (WekaClassifier) miningModel.getClassifier();
		Object classifer = wekaClassifier.getWekaClassifier();

		// Get Weka instances from mining stream:
		Instances wekaInstances = null;
		double[][] classifierResults = null;

		try {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -