⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hierarchicalagglomerative.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.1
 */

package com.prudsys.pdm.Models.Clustering.Hierarchical.Algorithms;

import java.util.Vector;

import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Hierarchical.ClusterDistance;
import com.prudsys.pdm.Models.Clustering.Hierarchical.DistanceMatrix;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalCluster;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalClusteringAlgorithm;

/**
 * Class for hierarchical agglomerative clustering.
 */
public class HierarchicalAgglomerative extends HierarchicalClusteringAlgorithm
{

  /**
   * Runs hierarchical agglomerative clustering algorithm.
   *
   * @exception MiningException could not run algorithm
   */
  protected void runAlgorithm() throws MiningException {

    // Number of attributes and vectors:
    int numbAtt = metaData.getAttributesNumber();
    int numbVec = 0;

    // Get minimum and maximum of attributes, used if normalization:
    double[] minArr = new double[ numbAtt ];
    double[] maxArr = new double[ numbAtt ];
    for (int i = 0; i < numbAtt; i++) {
      minArr[i] = 0.0;
      maxArr[i] = 0.0;
    };
    while (miningInputStream.next()) {
      MiningVector vec = miningInputStream.read();
      for (int i = 0; i < numbAtt; i++) {
        if (vec.getValue(i) < minArr[i])
          minArr[i] = vec.getValue(i);
        if (vec.getValue(i) > maxArr[i])
          maxArr[i] = vec.getValue(i);
      };
      numbVec = numbVec + 1;
    };
    distance.setMinAtt( minArr );
    distance.setMaxAtt( maxArr );

    // Form all vector clusters:
    Vector hclust = new Vector();
    int ind       = 0;
    miningInputStream.reset();
    while (miningInputStream.next()) {
      MiningVector mv        = miningInputStream.read();
      HierarchicalCluster hc = new HierarchicalCluster();
      hc.setCenterVec(mv);
      Vector contVec = new Vector();
      contVec.addElement(mv);
      hc.setContainedVectors(contVec);
      hc.setLeaf(true);
      hc.setIndex(ind);
      hclust.addElement(hc);
      ind = ind + 1;
    };

    // Calculate all distances of vector clusters and get minimum distance:
    ClusterDistance clustDist = (ClusterDistance) distance;
    DistanceMatrix distMat    = new DistanceMatrix();
    distMat.initDistanceArray(numbVec);
    clustDist.setDistanceMatrix( distMat );
    for (int i = 0; i < numbVec; i++)
      for (int j = i+1; j < numbVec; j++)
        clustDist.clusterDistance( (HierarchicalCluster) hclust.elementAt(i),
                                   (HierarchicalCluster) hclust.elementAt(j) );

    System.out.println("...matrix distance. Get clusters:");

    int nclust       = numbVec;
    Vector usedClust = new Vector();
    for (int i = 0; i < nclust; i++)
      usedClust.addElement( new Boolean(false) );
    // Add cluster by cluster iteratively:
    while(true) {

      // Only one cluster not used => root => quit:
      int nused = 0;
      for (int i = 0; i < nclust; i++)
        if ( ((Boolean) usedClust.elementAt(i)).booleanValue() == false)
          nused = nused + 1;
      if (nused == 1)
        break;

      // Find cluster of minimum distance:
      int im = -1;
      int jm = -1;
      double mdist = Double.POSITIVE_INFINITY;
      for (int i = 0; i < nclust; i++)
        for (int j = i+1; j < nclust; j++) {
          // One of the clusters already used:
          if ( ( ((Boolean) usedClust.elementAt(i)).booleanValue() == true) ||
               ( ((Boolean) usedClust.elementAt(j)).booleanValue() == true) )
            continue;

          // Calculate cluster distance:
          double dist = clustDist.clusterDistance( (HierarchicalCluster) hclust.elementAt(i),
                                                   (HierarchicalCluster) hclust.elementAt(j) );

          // Find minimum cluster distance:
          if (dist <= mdist) {
            mdist = dist;
            im    = i;
            jm    = j;
          };
      };

      // Merge nearest clusters:
      HierarchicalCluster hc1 = (HierarchicalCluster) hclust.elementAt(im);
      HierarchicalCluster hc2 = (HierarchicalCluster) hclust.elementAt(jm);
      HierarchicalCluster hc  = new HierarchicalCluster(hc1, hc2, mdist);
      hc.setIndex(nclust);
      if (clustDist.getClustDistType() == clustDist.CDTYPE_CENTROID) {
        MiningVector c1 = hc1.getCenterVec();
        MiningVector c2 = hc2.getCenterVec();
        double[] val1   = c1.getValues();
        double[] val2   = c2.getValues();
        double[] valc   = new double[numbAtt];
        double w1       = hc1.getWeight();
        double w2       = hc2.getWeight();
        double w        = w1 + w2;
        for (int i = 0; i < numbAtt; i++)
          valc[i] = (w1*val1[i] + w2*val2[i]) / w;
        MiningVector mv = new MiningVector(valc);
        mv.setMetaData( c1.getMetaData() );
        hc.setCenterVec(mv);
        hc.setWeight(w);
      };
      hclust.addElement(hc);
      usedClust.setElementAt( new Boolean(true), im );
      usedClust.setElementAt( new Boolean(true), jm );
      usedClust.addElement( new Boolean(false) );
      nclust = nclust + 1;
    };

    // Find root cluster:
    int iroot = -1;
    for (int i = 0; i < nclust; i++)
      if ( ((Boolean) usedClust.elementAt(i)).booleanValue() == false)
          iroot = i;
    HierarchicalCluster root = (HierarchicalCluster) hclust.elementAt(iroot);

    // Create array of clusters, last cluster is root:
    clusters = new HierarchicalCluster[nclust];
    for (int i = 0; i < nclust; i++)
      clusters[i] = (HierarchicalCluster) hclust.elementAt(i);

    clustDist.setDistanceMatrix(null);


    System.out.println("...calculation finished." );
  };
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -