📄 cdbasedclusteringminingmodel.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Models.Clustering.CDBased;
import java.io.FileWriter;
import java.io.Reader;
import java.io.Writer;
import com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringField;
import com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringModel;
import com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure;
import com.prudsys.pdm.Adapters.PmmlVersion20.DataDictionary;
import com.prudsys.pdm.Adapters.PmmlVersion20.Header;
import com.prudsys.pdm.Adapters.PmmlVersion20.MiningSchema;
import com.prudsys.pdm.Adapters.PmmlVersion20.PMML;
import com.prudsys.pdm.Adapters.PmmlVersion20.TransformationDictionary;
import com.prudsys.pdm.Core.ApplicationInputSpecification;
import com.prudsys.pdm.Core.MetaDataOperations;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningMatrixElement;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.ClusteringMiningModel;
import com.prudsys.pdm.Models.Clustering.ClusteringSettings;
import com.prudsys.pdm.Models.Clustering.Distance;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Utils.PmmlUtils;
/**
* Description of data produced by a center-based or
* distribution-based clustering method. <p>
*
* From PDM CWM extension. <p>
*
* Superclasses:
* <ul>
* <li> ClusteringMiningModel
* </ul>
*
* In addition, functionality from PMML was added.
* It corresponds to the PMML element ClusteringModel.
*
* @see com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringModel
*/
public class CDBasedClusteringMiningModel extends ClusteringMiningModel
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Center-based clustering model. */
public static final int CENTER_BASED = 1;
/** Distribution-based clustering model. */
public static final int DISTRIBUTION_BASED = 2;
/** Distribution type. */
private int type = CENTER_BASED;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public CDBasedClusteringMiningModel()
{
function = MiningModel.CLUSTERING_FUNCTION;
algorithm = MiningModel.CENTER_BASED_CLUSTERING_ALGORITHM;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Sets type of clustering (center-based, distribution-based).
*
* @param type new type of clustering
*/
public void setType(int type)
{
this.type = type;
}
/**
* Returns type of clustering (center-based, distribution-based).
*
* @return type of clustering
*/
public int getType()
{
return type;
}
// -----------------------------------------------------------------------
// Apply model to new data
// -----------------------------------------------------------------------
/**
* Applies clustering model to mining vector returning the number
* of the cluster it belongs to. Simply chooses the cluster with
* the nearest center vector. Could be overwritten be extended
* clustering models.
*
* @param miningVector mining vector to be assigned to a cluster
* @return number of the cluster the vector belongs to
* @throws MiningException if there are some errors when model is applied
* @deprecated since version 1.1, use applyModel instead
*/
public int apply(MiningVector miningVector) throws MiningException
{
return (int) applyModelFunction(miningVector);
}
/**
* Applies function of CD-based clustering mining model to a mining vector.
* The meta data of the mining vector should be similar to the metaData
* of this class. This ensures compatibility of training and
* application data.
*
* @param miningVector mining vector where the model should be applied
* @return function value of the mining vector
* @throws MiningException if there are some errors when model is applied
*/
public double applyModelFunction(MiningVector miningVector)
throws MiningException
{
if (clusters == null || clusters.length == 0)
return -1;
// Run inner transformations (e.g. missing values replacement, outliers):
if (miningTransform != null)
miningVector = miningTransform.transform(miningVector);
// Transform vector into basis of mining model:
MetaDataOperations metaDataOp = miningSettings.getDataSpecification().getMetaDataOp();
metaDataOp.setUsageType(MetaDataOperations.USE_ATT_NAMES_AND_TYPES);
MiningVector miningVec = metaDataOp.transform(miningVector);
// Find nearest cluster:
int nearestClust = 0;
double minDist = distance.distance( miningVec, clusters[0].getCenterVec() );
for (int i = 1; i < getNumberOfClusters(); i++) {
double dist = distance.distance( miningVec, clusters[i].getCenterVec() );
if (dist < minDist) {
minDist = dist;
nearestClust = i;
};
};
return nearestClust;
}
/**
*
* Returns the distance between the mining vector and the cluster.
* >>By TWang. Jan 19, 2005.
*
* @param miningVector mining vector where the model should be applied
* @return function value of the mining vector
* @throws MiningException if there are some errors when model is applied
*/
public double getDistance(MiningVector miningVector, Cluster cluster) throws MiningException {
if (clusters == null || clusters.length == 0)
return -1;
// Run inner transformations (e.g. missing values replacement, outliers):
if (miningTransform != null)
miningVector = miningTransform.transform(miningVector);
// Transform vector into basis of mining model:
MetaDataOperations metaDataOp = miningSettings.getDataSpecification().getMetaDataOp();
metaDataOp.setUsageType(MetaDataOperations.USE_ATT_NAMES_AND_TYPES);
MiningVector miningVec = metaDataOp.transform(miningVector);
// Find nearest cluster:
return distance.distance( miningVec, cluster.getCenterVec() );
}
/**
* Returns the cluster object of the mining vector given as argument.
* The meta data of the mining vector should be similar to the metaData
* of this class. This ensures compatibility of training and
* application data.
*
* @param miningData mining vector where the cluster should be determined
* @return cluster where the mining vector belongs to
* @throws MiningException if there are some errors when model is applied
*/
public MiningMatrixElement applyModel(MiningMatrixElement miningData)
throws MiningException {
MiningVector mv = (MiningVector) miningData;
int iClust = (int) applyModelFunction(mv);
if (iClust < 0) return null;
return clusters[iClust];
}
// -----------------------------------------------------------------------
// Methods of PMML handling
// -----------------------------------------------------------------------
/**
* Read clustering model from PMML document.
*
* @param reader reader for the PMML document
* @exception MiningException cannot read PMML model
*/
public void readPmml( Reader reader ) throws MiningException
{
// com.borland.xml.toolkit.XmlUtil.setEncoding( "UTF-8" );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -