📄 kmeansoperator.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* $Date$
* $Revision$
*/
package eti.bi.alphaminer.patch.standard.operation.operator;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAlgorithm;
import com.prudsys.pdm.Core.MiningAlgorithmSpecification;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.ClusteringMiningModel;
import com.prudsys.pdm.Models.Clustering.Distance;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringMiningModel;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringSettings;
import com.prudsys.pdm.Utils.GeneralUtils;
import eti.bi.alphaminer.core.handler.ICaseHandler;
import eti.bi.alphaminer.operation.operator.INodeInfo;
import eti.bi.alphaminer.operation.operator.ModelOperator;
import eti.bi.alphaminer.operation.operator.Operator;
import eti.bi.alphaminer.vo.BIData;
import eti.bi.alphaminer.vo.BIModel;
import eti.bi.alphaminer.vo.BIObject;
import eti.bi.alphaminer.vo.IBIData;
import eti.bi.alphaminer.vo.IBIModel;
import eti.bi.alphaminer.vo.IOperatorNode;
import eti.bi.common.Locale.Resource;
import eti.bi.exception.SysException;
/**
* Current clustering algorithm (k-means) only supports numeric attributes. For categorical
* or attributes of mixed types, the clustering results can be meaningless.
* TWang. June 21, 2005
*
*/
public class KMeansOperator extends ModelOperator {
/**
*
*/
private static final long serialVersionUID = 1L;
/**
* @param a_CaseID
* @param a_CaseWindow
* @param aOperatorInfo
*/
public KMeansOperator(String a_CaseID, INodeInfo aNodeInfo, ICaseHandler aCaseHandler) {
super(a_CaseID, aNodeInfo, aCaseHandler);
// TODO Auto-generated constructor stub
}
/* Parameter name for Clustering Operator in BIML */
public static String DISTANCE_TYPE = "Distance type";
public static String COMPARE_FUNCTION = "Compare function";
public static String MEASURE = "Measure";
public static String NORMALIZE = "Normalize";
public static String CLUSTER_NUMBER = "Cluster number";
public static String ITERATION_NUMBER = "Iteration number";
/* Default parameter value for Clustering Operator */
public static String DEFAULT_DISTANCE_TYPE = String.valueOf(Distance.TYPE_EUCLIDEAN);
public static String DEFAULT_COMPARE_FUNCTION = String.valueOf(Distance.COMPARISON_FUNCTION_ABS_DIFF);
public static String DEFAULT_MEASURE = String.valueOf(Distance.MEASURE_TYPE_DISTANCE);
public static String DEFAULT_NORMALIZE = String.valueOf(false);
public static String DEFAULT_CLUSTER_NUMBER = "3";
public static String DEFAULT_ITERATION_NUMBER = "100";
/* Parameter name for MiningSettingSpecification and MiningAlgorithm */
private static String ALGORITHM_NAME = "KMeans";
private static String MAP_CLUSTER_NUMBER = "numberOfClusters";
private static String MAP_ITERATION_NUMBER = "maxNumberOfIterations";
/* Vectors storing Clustering model statistics */
private Vector m_Predicted;
/**variable used in statistics, store the int value of the predicted result
* Twang. Jan 18, 2005
*/
private Vector m_IntPredicted;
private Vector m_Distances;
/**
* End Twang.
*/
/**
* Set node id and update operator text of the DecisionTreeOperator at the same time.
* @param a_NodeID ID of the node
*/
public void setNodeID(String a_NodeID) {
setLabel(getDescription() + " [" + a_NodeID + "]");
setDefaultModelName("K-Means_" + a_NodeID);
super.setNodeID(a_NodeID);
}
/**
* Set node id and update operator text of the DecisionTreeOperator at the same time.
* @param a_NodeID ID of the node
*/
public void setDescription(String a_Description) {
m_Description = a_Description;
setLabel(m_Description + " [" + m_NodeID + "]");
setDefaultModelName("K-Means_" + m_NodeID);
}
/**
* Gets the predicted values.
* @return a vector storing predicted values.
*/
public Vector getPredicted() {
return m_Predicted;
}
/**
* Gets the predicted values (integers).
* @return a vector storing predicted valeus (integers).
*/
public Vector getIntPredicted() {
return m_IntPredicted;
}
/**
* Clear the stored predicted values.
*/
public void clearPredicted()
{
m_Predicted = null;
}
/**
* Clear the stored predicted values (integers).
*/
public void clearIntPredicted()
{
m_IntPredicted = null;
}
/**
* Clear the stored distance values.
*/
public void clearDis(){
m_Distances = null;
}
/**
* Add an entry of predicted cluster.
* @param predict the predicted cluster.
*/
@SuppressWarnings("unchecked")
public void addPredicted(String predict) {
if (m_Predicted==null)
m_Predicted = new Vector();
if (predict==null)
predict = "";
m_Predicted.addElement(predict);
}
/**
* If the predict value is -1, it means the result is not applicable
* @param predict
*/
@SuppressWarnings("unchecked")
public void addIntPredicted(Integer predict) {
if (m_IntPredicted==null)
m_IntPredicted = new Vector();
if (predict==null)
predict = new Integer("-1");
m_IntPredicted.addElement(predict);
}
/**
* If the predict value is -1, it means the result is not applicable
* @param dis
*/
@SuppressWarnings("unchecked")
public void addDistance(Double dis) {
if (m_Distances == null)
m_Distances = new Vector();
if (dis == null)
dis = new Double("-1");
m_Distances.addElement(dis);
}
/**
* Test if the Clustering Operator contains any results.
* @return true if Clustering Operator has result; false otherwise.
*/
public boolean hasResult()
{
if (m_OutputBIObject != null)
{
return (m_OutputBIObject.hasResult(BIObject.DATA) &&
m_OutputBIObject.hasResult(BIObject.MODEL));
}else
{
return false;
}
}
/**
* Build clustering model for this Clustering Operator.
* @param a_OperatorNode Operator Node represented by this Clustering Operator.
* @param a_Parents a Vector storing node IDs of parent nodes of this Clustering Operator.
*/
public void execute(IOperatorNode a_OperatorNode, Vector a_Parents)
throws MiningException, SysException
{
/* Get parameter from user input */
String distanceValue = (String) a_OperatorNode.getParameterValue(DISTANCE_TYPE);
if (distanceValue==null)
{
distanceValue = DEFAULT_DISTANCE_TYPE;
}
String functionValue = (String) a_OperatorNode.getParameterValue(COMPARE_FUNCTION);
if (functionValue==null)
{
functionValue = DEFAULT_COMPARE_FUNCTION;
}
String measureValue = (String) a_OperatorNode.getParameterValue(MEASURE);
if (measureValue==null)
{
measureValue = DEFAULT_MEASURE;
}
String normalizeValue = (String) a_OperatorNode.getParameterValue(NORMALIZE);
if (normalizeValue==null)
{
normalizeValue= DEFAULT_NORMALIZE;
}
String numClusterValue = (String) a_OperatorNode.getParameterValue(CLUSTER_NUMBER);
if (numClusterValue==null)
{
numClusterValue = DEFAULT_CLUSTER_NUMBER;
}
String numIterationValue = (String) a_OperatorNode.getParameterValue(ITERATION_NUMBER);
if (numIterationValue==null)
{
numIterationValue = DEFAULT_ITERATION_NUMBER;
}
/* Get input bi object from parent node */
Operator parentOp = (Operator)a_Parents.elementAt(0);
setInputBIObject(parentOp.getOutputBIObject());
IBIData aInputBIData = getInputBIObject().getBIData();
aInputBIData.getMiningStoredData().reset();
if(!aInputBIData.hasResult()){
throw new SysException( "No data inputed." );
}
/* Prepare output data model */
BIData aOutputBIData = new BIData(getCaseID(), getNodeID());
aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
aOutputBIData.setTransformActionHistory(aInputBIData.getTransformActionHistory());
aOutputBIData.setTargetAttribute(aInputBIData.getTargetAttribute());
aOutputBIData.setMiningStoredData(aInputBIData.getMiningStoredData());
BIModel aOutputBIModel = new BIModel(getCaseID(), getNodeID(), IBIModel.TYPE_CLASSIFIER);
/* Execure Clustering Model Building */
/* Create MiningSettings object and assign metadata */
CDBasedClusteringSettings miningSettings = new CDBasedClusteringSettings();
MiningAttribute miningAttributes[] = aInputBIData.getMetaData().getAttributesArray();
// Work around: Clustering algorithm does not produce valid result for cat. attribute, so pop up a warning
// Message.
boolean isContainCats = false;
for (int i=0;i<miningAttributes.length;i++)
{
if (miningAttributes[i] instanceof CategoricalAttribute)
{
isContainCats = true;
}
}
if (isContainCats)
{
m_MessageDialog.showWarning("Categorical attributes are not supported and will be ignored in building cluster.","Warning");
}
miningSettings.setDataSpecification(aInputBIData.getMetaData());
/* Assign settings */
Distance dist = new Distance();
dist.setType(Integer.parseInt(distanceValue));
dist.setCompareFunction(Integer.parseInt(functionValue));
dist.setMeasureType(Integer.parseInt(measureValue));
boolean norm = false;
if (normalizeValue.equals(String.valueOf(true)))
{
norm = true;
}
dist.setNormalized(norm);
miningSettings.setDistance( dist );
miningSettings.verifySettings();
aOutputBIModel.setMiningSettings(miningSettings);
/* Get default mining algorithm specification from 'algorithms.xml': */
MiningAlgorithmSpecification miningAlgorithmSpecification =
MiningAlgorithmSpecification.getMiningAlgorithmSpecification( ALGORITHM_NAME, getNodeInfo());
if( miningAlgorithmSpecification == null )
{
throw new SysException( "Can't find clustering method." );
}
/* Get class name from algorithms specification */
String className = miningAlgorithmSpecification.getClassname();
if( className == null )
{
throw new SysException( "classname attribute expected." );
}
/* Set and display mining parameters */
miningAlgorithmSpecification.setMAPValue(MAP_CLUSTER_NUMBER, numClusterValue);
miningAlgorithmSpecification.setMAPValue(MAP_ITERATION_NUMBER, numIterationValue);
aOutputBIModel.setMiningAlgorithmSpecification(miningAlgorithmSpecification);
displayMiningAlgSpecParameters(miningAlgorithmSpecification);
/* Create algorithm object with default values */
MiningAlgorithm algorithm = (CDBasedClusteringAlgorithm)
GeneralUtils.createMiningAlgorithmInstance(className,this.getClass().getClassLoader());
/* Put it all together */
algorithm.setMiningInputStream( aInputBIData.getMiningStoredData() );
algorithm.setMiningSettings( miningSettings );
algorithm.setMiningAlgorithmSpecification( miningAlgorithmSpecification );
algorithm.verify();
/* Build the mining model */
MiningModel model = algorithm.buildModel();
m_SystemMessageHandler.appendMessage(Resource.srcStr("calculationtime")+" [s]: " + algorithm.getTimeSpentToBuildModel()+Resource.srcStr("ms"));
m_SystemMessageHandler.nextLine();
m_SystemMessageHandler.nextLine();
/* set output mining data and model to the output mining object */
aOutputBIModel.setMiningModel(model);
aOutputBIModel.setModelName("Clustering_"+a_OperatorNode.getNodeID());
m_OutputBIObject.setBIData(aOutputBIData);
m_OutputBIObject.setBIModel(aOutputBIModel);
/* set run time parameter value to the node object (It needs to be stored in the BIML) */
//a_OperatorNode.setParameterValue("Temporary model", aOutputBIModel.getTempBIModelPath());
//aOutputBIModel.writeTempBIModel();
/*
* Apply the model to get the cluster and distance information.
* By TWang. Jan 19, 2005.
*/
Cluster[] clusters = ((ClusteringMiningModel)model).getClusters();
aInputBIData.getMiningStoredData().reset();
clearPredicted();
clearIntPredicted();
clearDis();
while (aInputBIData.getMiningStoredData().next()) {
// Make prediction:
MiningVector vector = aInputBIData.getMiningStoredData().read();
int predicted = (int) model.applyModelFunction(vector);
addPredicted("clust"+String.valueOf(predicted));
addIntPredicted(new Integer(predicted));
double dis = ((CDBasedClusteringMiningModel)model).getDistance(vector, clusters[predicted]);
addDistance(new Double(dis));
};
}
/**
* @return Returns the m_Distances.
*/
public Vector getDistances() {
return m_Distances;
}
/**
* @param distances The m_Distances to set.
*/
public void setDistances(Vector distances) {
m_Distances = distances;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -