cdbasedclusteringminingmodel.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 477 行 · 第 1/2 页
JAVA
477 行
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.0
 */

package com.prudsys.pdm.Models.Clustering.CDBased;

import java.io.FileWriter;
import java.io.Reader;
import java.io.Writer;

import com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringField;
import com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringModel;
import com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure;
import com.prudsys.pdm.Adapters.PmmlVersion20.DataDictionary;
import com.prudsys.pdm.Adapters.PmmlVersion20.Header;
import com.prudsys.pdm.Adapters.PmmlVersion20.MiningSchema;
import com.prudsys.pdm.Adapters.PmmlVersion20.PMML;
import com.prudsys.pdm.Adapters.PmmlVersion20.TransformationDictionary;
import com.prudsys.pdm.Core.ApplicationInputSpecification;
import com.prudsys.pdm.Core.MetaDataOperations;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.MiningMatrixElement;
import com.prudsys.pdm.Core.MiningModel;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Input.Records.Arff.MiningArffStream;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.ClusteringMiningModel;
import com.prudsys.pdm.Models.Clustering.ClusteringSettings;
import com.prudsys.pdm.Models.Clustering.Distance;
import com.prudsys.pdm.Transform.MiningTransformationActivity;
import com.prudsys.pdm.Utils.PmmlUtils;

/**
  * Description of data produced by a center-based or
  * distribution-based clustering method. <p>
  *
  * From PDM CWM extension. <p>
  *
  * Superclasses:
  * <ul>
  *   <li> ClusteringMiningModel
  * </ul>
  *
  * In addition, functionality from PMML was added.
  * It corresponds to the PMML element ClusteringModel.
  *
  * @see com.prudsys.pdm.Adapters.PmmlVersion20.ClusteringModel
  */
public class CDBasedClusteringMiningModel extends ClusteringMiningModel
{
    // -----------------------------------------------------------------------
    //  Variables declarations
    // -----------------------------------------------------------------------
    /** Center-based clustering model. */
    public static final int CENTER_BASED = 1;

    /** Distribution-based clustering model. */
    public static final int DISTRIBUTION_BASED = 2;

    /** Distribution type. */
    private int type = CENTER_BASED; 
    

    // -----------------------------------------------------------------------
    //  Constructor
    // -----------------------------------------------------------------------
    /**
     * Empty constructor.
     */
    public CDBasedClusteringMiningModel()
    {
        function  = MiningModel.CLUSTERING_FUNCTION;
        algorithm = MiningModel.CENTER_BASED_CLUSTERING_ALGORITHM;
    }

    // -----------------------------------------------------------------------
    //  Getter and setter methods
    // -----------------------------------------------------------------------
    /**
     * Sets type of clustering (center-based, distribution-based).
     *
     * @param type new type of clustering
     */
    public void setType(int type)
    {
      this.type = type;
    }

    /**
     * Returns type of clustering (center-based, distribution-based).
     *
     * @return type of clustering
     */
    public int getType()
    {
      return type;
    }

    // -----------------------------------------------------------------------
    //  Apply model to new data
    // -----------------------------------------------------------------------
    /**
     * Applies clustering model to mining vector returning the number
     * of the cluster it belongs to. Simply chooses the cluster with
     * the nearest center vector. Could be overwritten be extended
     * clustering models.
     *
     * @param miningVector mining vector to be assigned to a cluster
     * @return number of the cluster the vector belongs to
     * @throws MiningException if there are some errors when model is applied
     * @deprecated since version 1.1, use applyModel instead
     */
    public int apply(MiningVector miningVector) throws MiningException
    {
      return (int) applyModelFunction(miningVector);
    }

    /**
     * Applies function of CD-based clustering mining model to a mining vector.
     * The meta data of the mining vector should be similar to the metaData
     * of this class. This ensures compatibility of training and
     * application data.
     *
     * @param miningVector mining vector where the model should be applied
     * @return function value of the mining vector
     * @throws MiningException if there are some errors when model is applied
     */
    public double applyModelFunction(MiningVector miningVector)
        throws MiningException
    {
      if (clusters == null || clusters.length == 0)
        return -1;

      // Run inner transformations (e.g. missing values replacement, outliers):
      if (miningTransform != null)
        miningVector = miningTransform.transform(miningVector);

      // Transform vector into basis of mining model:
      MetaDataOperations metaDataOp = miningSettings.getDataSpecification().getMetaDataOp();
      metaDataOp.setUsageType(MetaDataOperations.USE_ATT_NAMES_AND_TYPES);
      MiningVector miningVec = metaDataOp.transform(miningVector);

      // Find nearest cluster:
      int nearestClust = 0;
      double minDist   = distance.distance( miningVec, clusters[0].getCenterVec() );
      for (int i = 1; i < getNumberOfClusters(); i++) {
        double dist = distance.distance( miningVec, clusters[i].getCenterVec() );
        if (dist < minDist) {
          minDist      = dist;
          nearestClust = i;
        }; 
      }; 

      return nearestClust;
    }
    
    /**
     * 
     * Returns the distance between the mining vector and the cluster.  
     * >>By TWang. Jan 19, 2005. 
     * 
     * @param miningVector mining vector where the model should be applied
     * @return function value of the mining vector
     * @throws MiningException if there are some errors when model is applied 
     */
    public double getDistance(MiningVector miningVector, Cluster cluster) throws MiningException { 
    	
        if (clusters == null || clusters.length == 0)
          return -1;

        // Run inner transformations (e.g. missing values replacement, outliers):
        if (miningTransform != null)
          miningVector = miningTransform.transform(miningVector);

        // Transform vector into basis of mining model:
        MetaDataOperations metaDataOp = miningSettings.getDataSpecification().getMetaDataOp();
        metaDataOp.setUsageType(MetaDataOperations.USE_ATT_NAMES_AND_TYPES);
        MiningVector miningVec = metaDataOp.transform(miningVector);

        // Find nearest cluster:
        return distance.distance( miningVec, cluster.getCenterVec() );
    }
    

    /**
     * Returns the cluster object of the mining vector given as argument.
     * The meta data of the mining vector should be similar to the metaData
     * of this class. This ensures compatibility of training and
     * application data.
     *
     * @param miningData mining vector where the cluster should be determined
     * @return cluster where the mining vector belongs to
     * @throws MiningException if there are some errors when model is applied
     */
    public MiningMatrixElement applyModel(MiningMatrixElement miningData)
        throws MiningException {

      MiningVector mv = (MiningVector) miningData;
      int iClust      = (int) applyModelFunction(mv);
      if (iClust < 0) return null;

      return clusters[iClust];
    }

    // -----------------------------------------------------------------------
    //  Methods of PMML handling
    // -----------------------------------------------------------------------
    /**
     * Read clustering model from PMML document.
     *
     * @param reader reader for the PMML document
     * @exception MiningException cannot read PMML model
     */
    public void readPmml( Reader reader ) throws MiningException
    {
//        com.borland.xml.toolkit.XmlUtil.setEncoding( "UTF-8" );
cdbasedclusteringminingmodel.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 cdbasedclusteringminingmodel.java 源码文件，采用 Java 编程语言编写，共 477 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?