⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clusterdistance.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.1
 */

package com.prudsys.pdm.Models.Clustering.Hierarchical;

import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Distance;

/**
 * Class for calculating distances between 2 clusters.
 *
 * This is done via the method clusterDistance.
 * If both clusters contain just one vector, the
 * method corresponds to the distance method
 * of two vectors.
 */
public class ClusterDistance extends Distance
{
  // -----------------------------------------------------------------------
  //  Constants of cluster distance calculation
  // -----------------------------------------------------------------------
  /** Single linkage, also refered to as nearest neighbour method. */
  public static final int CDTYPE_SINGLE_LINKAGE = 1;

  /** Complete linkage, also refered to a furthest neighbour method. */
  public static final int CDTYPE_COMPLETE_LINKAGE = 2;

  /** Average linking, also refered to as UPGMA method. */
  public static final int CDTYPE_UNWEIGHTED_PAIR_GROUP_AVERAGE = 3;

  /** Weighted average linking, also refered to as WPGMA method. */
  public static final int CDTYPE_WEIGHTED_PAIR_GROUP_AVERAGE = 4;

  /** Centroid method. */
  public static final int CDTYPE_CENTROID = 5;

  /** Median method. */
  public static final int CDTYPE_MEDIAN = 6;

  /** Ward's method. */
  public static final int CDTYPE_WARD_METHOD = 7;

  // -----------------------------------------------------------------------
  //  Variables declarations
  // -----------------------------------------------------------------------
  /** Type of cluster distance calculation method. */
  private int clustDistType = CDTYPE_CENTROID;

  /** Distance matrix. */
  private DistanceMatrix distanceMatrix = null;

  // -----------------------------------------------------------------------
  //  Constructors
  // -----------------------------------------------------------------------
  /**
   * Empty constructor.
   */
  public ClusterDistance()
  {
  }

  /**
   * Constructor for a given number of attributes.
   *
   * @param numbAttributes number of attributes
   */
  public ClusterDistance(int numbAttributes) {

    super(numbAttributes);
  }

  // -----------------------------------------------------------------------
  //  Getter and setter methods
  // -----------------------------------------------------------------------
  /**
   * Returns cluster distance type (Single linkage, Complete linkage, ...).
   *
   * @return cluster distance type
   */
  public int getClustDistType()
  {
    return clustDistType;
  }

  /**
   * Sets cluster distance type (Single linkage, Complete linkake, ...).
   *
   * @param type cluster distance type
   */
  public void setClustDistType(int type)
  {
    this.clustDistType = type;
  }

  /**
   * Returns distance matrix.
   *
   * @return distance matrix, null if not used
   */
  public DistanceMatrix getDistanceMatrix()
  {
    return distanceMatrix;
  }

  /**
   * Sets distance matrix.
   *
   * @param distanceMatrix new distance matrix
   */
  public void setDistanceMatrix(DistanceMatrix distanceMatrix)
  {
    this.distanceMatrix = distanceMatrix;
  }

  // -----------------------------------------------------------------------
  //  Methods of cluster distance calculation
  // -----------------------------------------------------------------------
  /**
   * Calculates distance between two clusters.
   *
   * @param clust1 cluster 1
   * @param clust2 cluster 2
   * @return distance between the two clusters
   * @exception MiningException clusters have different meta data
   */
  public double clusterDistance(HierarchicalCluster clust1, HierarchicalCluster clust2) throws MiningException {

    // Look for distance matrix:
    if (distanceMatrix == null)
      throw new MiningException("No distance matrix defined");

    // Look in distance matrix if exist:
    double cdist = distanceMatrix.getDistance(clust1, clust2);
    if (! Category.isMissingValue(cdist))
      return cdist;

    // Distance between vector-clusters => simply vector distance:
    int nc1 = clust1.getContainedVectors().size();
    int nc2 = clust2.getContainedVectors().size();
    if (nc1 == 1 && nc2 == 1) {
      MiningVector cv1 = (MiningVector) clust1.getContainedVectors().elementAt(0);
      MiningVector cv2 = (MiningVector) clust2.getContainedVectors().elementAt(0);
      cdist = distance(cv1, cv2);

      // Add to distance matrix:
      distanceMatrix.putDistance(clust1, clust2, cdist);
      return cdist;
    };

    // New cluster with subclusters (1,2), other cluster 3:
    HierarchicalCluster hc1 = clust1.getChild1();
    HierarchicalCluster hc2 = clust1.getChild2();
    HierarchicalCluster hc3 = clust2;
    if (clust1.getIndex() < clust2.getIndex()) {      // cluster 2 is new
      hc1 = clust2.getChild1();
      hc2 = clust2.getChild2();
      hc3 = clust1;
    };
    int n1 = hc1.getContainedVectors().size();  // vectors in subcluster 1
    int n2 = hc2.getContainedVectors().size();  // vectors in subcluster 2
    int n3 = hc3.getContainedVectors().size();  // vectors in cluster 3
    double d13 = clusterDistance(hc1, hc3);   // distance subcl. 1 - cl. 3
    double d23 = clusterDistance(hc2, hc3);   // distance subcl. 2 - cl. 3
    double d12 = clusterDistance(hc1, hc2);   // distance sbucl. 1 - subcl. 2

    // For type:
    switch (clustDistType)
    {
      case CDTYPE_SINGLE_LINKAGE:
        if ( d13 < d23 )
          cdist = d13;
        else
          cdist = d23;

        break;
      case CDTYPE_COMPLETE_LINKAGE:
        if ( d13 < d23 )
          cdist = d23;
        else
          cdist = d13;

        break;

      case CDTYPE_UNWEIGHTED_PAIR_GROUP_AVERAGE:
        cdist = ( d13 + d23 ) / 2;

        break;
      case CDTYPE_WEIGHTED_PAIR_GROUP_AVERAGE:
        cdist = ( n1*d13 + n2*d23 ) / ( n1 + n2 );

        break;
      case CDTYPE_MEDIAN:
        cdist = 0.5*d13 + 0.5*d23 - 0.25*d12;

        break;
      case CDTYPE_CENTROID:
/*
        cdist = distance(clust1.getCenterVec(), clust2.getCenterVec());
*/
        cdist = n1*d13/(n1+n2) + n2*d23/(n1+n2) -
                n1*n2*d12/( (n1+n2)*(n1+n2) );

        break;

      case CDTYPE_WARD_METHOD:
        cdist = ( (n1 + n3)*d13 + (n2 + n3)*d23 - n3 *d12 ) /
                ( n1 + n2 + n3 );

        break;

      default: throw new MiningException("Unknown cluster distance type specified.");
    };

    // Add to distance matrix:
    distanceMatrix.putDistance(clust1, clust2, cdist);

    return cdist;
  }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -