⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hierarchicalagglomerativefast.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.1
 */

package com.prudsys.pdm.Models.Clustering.Hierarchical.Algorithms;

import java.util.Vector;

import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Hierarchical.ClusterDistance;
import com.prudsys.pdm.Models.Clustering.Hierarchical.DistanceMatrix;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalCluster;
import com.prudsys.pdm.Models.Clustering.Hierarchical.HierarchicalClusteringAlgorithm;
import com.prudsys.pdm.Utils.IntVector;

/**
 * Class for assymptotical fast hierarchical agglomerative clustering
 * using adaptiv distance calculation.
 *
 * In contrast to HierarchicalAgglomerative it requires twice as much
 * memory but only O(n^2) operations in contrast of O(n^3) of
 * HierarchicalAgglomerative.
 */
public class HierarchicalAgglomerativeFast extends HierarchicalClusteringAlgorithm
{
  /**
   * Runs fast hierarchical agglomerative clustering algorithm.
   *
   * @exception MiningException could not run algorithm
   */
  protected void runAlgorithm() throws MiningException {

    // Number of attributes and vectors:
    int numbAtt = metaData.getAttributesNumber();
    int numbVec = miningInputStream.getVectorsNumber();

    // Get minimum and maximum of attributes, used if normalization:
    if (distance.isNormalized()) {
      double[] minArr = new double[ numbAtt ];
      double[] maxArr = new double[ numbAtt ];
      for (int i = 0; i < numbAtt; i++) {
        minArr[i] = 0.0;
        maxArr[i] = 0.0;
      };
      while (miningInputStream.next()) {
        MiningVector vec = miningInputStream.read();
        for (int i = 0; i < numbAtt; i++) {
          if (vec.getValue(i) < minArr[i])
            minArr[i] = vec.getValue(i);
          if (vec.getValue(i) > maxArr[i])
            maxArr[i] = vec.getValue(i);
        };
      };
      distance.setMinAtt( minArr );
      distance.setMaxAtt( maxArr );
    };

    // Form all vector clusters:
    int numbClust                = 2*numbVec - 1;
    HierarchicalCluster[] hclust = new HierarchicalCluster[numbClust];
    for (int i = 0; i < numbVec; i++) {
      MiningVector mv        = miningInputStream.read(i);
      HierarchicalCluster hc = new HierarchicalCluster();
      hc.setCenterVec(mv);
      Vector contVec = new Vector();
      contVec.addElement(mv);
      hc.setContainedVectors(contVec);
      hc.setLeaf(true);
      hc.setIndex(i);
      hclust[i] = hc;
    };

    // Calculate all distances of vector clusters and get minimum distance:
    ClusterDistance clustDist = (ClusterDistance) distance;
    DistanceMatrix distMat    = new DistanceMatrix();
    distMat.initDistanceArray(numbVec);
    clustDist.setDistanceMatrix(distMat);

    // Sort distance list:
    int numbDist  = (numbClust-1)*numbClust / 2;
    int[] ordDist = new int[numbDist];
    for (int i = 0; i < numbDist; i++)
      ordDist[i] = -1;

    // Calculate all distances between vector-clusters and order them:
    int id = 0;
    for (int i = 0; i < numbVec; i++)
      for (int j = i+1; j < numbVec; j++) {
        // Distance:
        double dist = clustDist.clusterDistance( hclust[i], hclust[j] );

        // Order:
        int ind     = distMat.c2i(i, j);
        ordDist[id] = ind;
        int no      = id;
        while ( no > 0 &&
                distMat.getDistanceArray(ordDist[no]) <
                  distMat.getDistanceArray(ordDist[no-1] )
              )
        {
          int it        = ordDist[no];
          ordDist[no]   = ordDist[no-1];
          ordDist[no-1] = it;

          no = no - 1;
        };

        id = id + 1;
      };

    System.out.println("...matrix distance. Get clusters:");

    // Add cluster by cluster iteratively:
    boolean[] usedClust = new boolean[numbClust];
    for (int nclust = numbVec; nclust < numbClust; nclust++) {

      int ind;
      int im;
      int jm;
      while (true) {
        ind      = ordDist[0];
        int[] cc = distMat.i2c(ind);
        im       = cc[0];
        jm       = cc[1];
        for (int i = 0; i < id-1; i++)
          ordDist[i] = ordDist[i+1];
        ordDist[id-1] = -1;
        id       = id - 1;
        if (!usedClust[im] && !usedClust[jm])
          break;
      };
      double mdist = distMat.getDistanceArray(ind);

      // Merge nearest clusters:
      HierarchicalCluster hc = new HierarchicalCluster(hclust[im], hclust[jm], mdist);
      hc.setIndex(nclust);
      hclust[nclust]    = hc;
      usedClust[im]     = true;
      usedClust[jm]     = true;

      // Add new cluster distances:
      for (int i = 0; i < nclust; i++) {
        if (usedClust[i])
          continue;

        double dist = clustDist.clusterDistance(hclust[i], hclust[nclust]);
        ind         = distMat.c2i(i, nclust);
        ordDist[id] = ind;
        id          = id + 1;

        int no = id-1;
        while ( no > 0 &&
                distMat.getDistanceArray(ordDist[no]) <
                  distMat.getDistanceArray(ordDist[no-1] )
              )
        {
          int it        = ordDist[no];
          ordDist[no]   = ordDist[no-1];
          ordDist[no-1] = it;

          no = no - 1;
        };
      };
    };

    // Create array of clusters, last cluster is root:
    clusters = new HierarchicalCluster[numbClust];
    for (int i = 0; i < numbClust; i++)
      clusters[i] = (HierarchicalCluster) hclust[i];
    clustDist.setDistanceMatrix(null);

    System.out.println("...calculation finished." );
  };

  /**
   * Runs fast hierarchical agglomerative clustering algorithm.
   * Uses a vector structure for dynamic distance storage.
   *
   * @exception MiningException could not run algorithm
   */
  protected void runAlgorithm2() throws MiningException {

    // Number of attributes and vectors:
    int numbAtt = metaData.getAttributesNumber();
    int numbVec = miningInputStream.getVectorsNumber();

    // Get minimum and maximum of attributes, used if normalization:
    if (distance.isNormalized()) {
      double[] minArr = new double[ numbAtt ];
      double[] maxArr = new double[ numbAtt ];
      for (int i = 0; i < numbAtt; i++) {
        minArr[i] = 0.0;
        maxArr[i] = 0.0;
      };
      while (miningInputStream.next()) {
        MiningVector vec = miningInputStream.read();
        for (int i = 0; i < numbAtt; i++) {
          if (vec.getValue(i) < minArr[i])
            minArr[i] = vec.getValue(i);
          if (vec.getValue(i) > maxArr[i])
            maxArr[i] = vec.getValue(i);
        };
      };
      distance.setMinAtt( minArr );
      distance.setMaxAtt( maxArr );
    };

    // Form all vector clusters:
    int numbClust                = 2*numbVec - 1;
    HierarchicalCluster[] hclust = new HierarchicalCluster[numbClust];
    for (int i = 0; i < numbVec; i++) {
      MiningVector mv        = miningInputStream.read(i);
      HierarchicalCluster hc = new HierarchicalCluster();
      hc.setCenterVec(mv);
      Vector contVec = new Vector();
      contVec.addElement(mv);
      hc.setContainedVectors(contVec);
      hc.setLeaf(true);
      hc.setIndex(i);
      hclust[i] = hc;
    };

    // Calculate all distances of vector clusters and get minimum distance:
    ClusterDistance clustDist = (ClusterDistance) distance;
    DistanceMatrix distMat    = new DistanceMatrix();
    distMat.initDistanceArray(numbVec);
    clustDist.setDistanceMatrix(distMat);

    // Sort distance list:
    int numbDist  = (numbClust-1)*numbClust / 2;
    IntVector ordDist = new IntVector();

    // Calculate all distances between vector-clusters and order them:
    for (int i = 0; i < numbVec; i++)
      for (int j = i+1; j < numbVec; j++) {
        // Distance:
        double dist = clustDist.clusterDistance( hclust[i], hclust[j] );

        // Order:
        int ind   = distMat.c2i(i, j);

//        int[] cc  = distMat.i2c(ind);
//System.out.println("i=" + i + " j=" + j + " ind=" + ind + " ii=" + cc[0] + " jj=" + cc[1]);

        ordDist.addElement(ind);
        int no = ordDist.size()-1;
        while ( no > 0 &&
                distMat.getDistanceArray(ordDist.IntegerAt(no)) <
                  distMat.getDistanceArray(ordDist.IntegerAt(no-1))
              )
        {
          int it = ordDist.IntegerAt(no);
          ordDist.setElementAt(ordDist.IntegerAt(no-1), no);
          ordDist.setElementAt(it, no-1);

          no = no - 1;
        };

      };

//    for (int i = 0; i < numbDist; i++)
//      System.out.println("dist["+i+"]=" + distMat.getDistanceArray(ordDist.IntegerAt(i)));

    // Add cluster by cluster iteratively:
    boolean[] usedClust = new boolean[numbClust];
    for (int nclust = numbVec; nclust < numbClust; nclust++) {
//System.out.println("nclust=" + nclust);

      int ind;
      int im;
      int jm;
      while (true) {
        ind      = ordDist.IntegerAt(0);
        int[] cc = distMat.i2c(ind);
        im       = cc[0];
        jm       = cc[1];
        if (usedClust[im] || usedClust[jm])
          ordDist.removeElementAt(0);
        else
          break;
      };
      double mdist = distMat.getDistanceArray( ind );

      // Merge nearest clusters:
      HierarchicalCluster hc = new HierarchicalCluster(hclust[im], hclust[jm], mdist);
      hc.setIndex(nclust);
      hclust[nclust]    = hc;
      usedClust[im]     = true;
      usedClust[jm]     = true;

      // Add new cluster distances:
      for (int i = 0; i < nclust; i++) {
        if (usedClust[i])
          continue;

        double dist = clustDist.clusterDistance(hclust[i], hclust[nclust]);
        ind         = distMat.c2i(i, nclust);
        ordDist.addElement(ind);

        int no = ordDist.size()-1;
        while ( no > 0 &&
                distMat.getDistanceArray(ordDist.IntegerAt(no)) <
                  distMat.getDistanceArray(ordDist.IntegerAt(no-1))
              )
        {
          int it = ordDist.IntegerAt(no);
          ordDist.setElementAt(ordDist.IntegerAt(no-1), no);
          ordDist.setElementAt(it, no-1);

          no = no - 1;
        };
      };
    };

    // Create array of clusters, last cluster is root:
    clusters = new HierarchicalCluster[numbClust];
    for (int i = 0; i < numbClust; i++)
      clusters[i] = (HierarchicalCluster) hclust[i];
    clustDist.setDistanceMatrix(null);

    System.out.println("...calculation finished." );
  };
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -