⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.0
 */

package com.prudsys.pdm.Models.Clustering.CDBased.Algorithms.KMeans;

import java.util.Random;
import java.util.Vector;

import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;

/**
 * @author   Administrator
 */
public class KMeans extends CDBasedClusteringAlgorithm
{
  private int numberOfClusters;
  private int maxNumberOfIterations = 100;
  private int numberOfIterations;

  private int clusterAssignments[];

  /**
   * Empty constructor.
   */
  public KMeans() {
  }

  /**
   * Checks mining algorithm for completeness by calling verify method
   * of superclass. Adiitionally, it checks whether numberOfClusters and
   * maxNumberOfIterations are admissible.
   *
   * @throws IllegalArgumentException if some algorithm attributes are incorrect
   */
  public void verify() throws IllegalArgumentException
  {
    super.verify();
    if (numberOfClusters < 0)
      throw new IllegalArgumentException("numberOfClusters can't be negative");
    if (maxNumberOfIterations < 0)
      throw new IllegalArgumentException("maxNumberOfIterations can't be negative");
  }

  /**
   * Runs clustering algorithm.
   *
   * @exception MiningException cannot run algorithm
   */
  protected void runAlgorithm() throws MiningException {

    int numbAtt = metaData.getAttributesNumber();
    int numbVec = 0;

    // Get minimum and maximum of attributes:
    double[] minArr = new double[ numbAtt ];
    double[] maxArr = new double[ numbAtt ];
    for (int i = 0; i < numbAtt; i++) {
      minArr[i] = 0.0;
      maxArr[i] = 0.0;
    };
    while (miningInputStream.next()) {
      MiningVector vec = miningInputStream.read();
      for (int i = 0; i < numbAtt; i++) {
        if (vec.getValue(i) < minArr[i])
          minArr[i] = vec.getValue(i);
        if (vec.getValue(i) > maxArr[i])
          maxArr[i] = vec.getValue(i);
      };
      numbVec = numbVec + 1;
    };
    distance.setMinAtt( minArr );
    distance.setMaxAtt( maxArr );

    // Create array of clusters:
    clusters = new Cluster[ numberOfClusters ];
    for (int i = 0; i < numberOfClusters; i++) {
      clusters[i] = new Cluster();
      clusters[i].setName("clust" + String.valueOf(i));
    };

    // Find numbOfClusters random vectors:
    clusterAssignments = new int[numbVec];
    boolean selected[] = new boolean[numbVec];
    Random rand        = new Random(10);
    for (int i = 0; i < numberOfClusters; i++) {
      int index = 0;
      do {
        index = Math.abs( rand.nextInt() ) % numbVec;
      }
      while (selected[index]);

      // Add center vector to cluster array:
      MiningVector vec = miningInputStream.read(index);
      clusters[i].setCenterVec( vec );

      selected[index] = true;
    };

    // Iterations:
    numberOfIterations = 0;
    boolean converged  = false;
    while (! converged && numberOfIterations < maxNumberOfIterations) {
//System.out.println("iter: " + (numberOfIterations+1));
      converged = true;

      // Find nearest cluster for all vectors:
      for (int i = 0; i < numbVec; i++) {
        int nC = clusterVector( miningInputStream.read(i) );
        if (nC != clusterAssignments[i]) {
          clusterAssignments[i] = nC;
          converged             = false;
        };
      };

      // Find new center vectors:
      for (int i = 0; i < numberOfClusters; i++) {
        double[] nullVal     = new double[ numbAtt ];
        MiningVector nullVec = new MiningVector( nullVal );
        nullVec.setMetaData( metaData );
        clusters[i].setCenterVec( nullVec );
      };
      int[] cardClusters = new int[numberOfClusters];
      for (int i = 0; i < numbVec; i++) {
        MiningVector vec = miningInputStream.read(i);
        int index        = clusterAssignments[i];
        for (int j = 0; j < numbAtt; j++) {
          double val = clusters[index].getCenterVec().getValue(j);
          val        = val + vec.getValue(j);
          clusters[index].getCenterVec().setValue(j, val);
        };
        cardClusters[index] = cardClusters[index] + 1;
      };
      for (int i = 0; i < numberOfClusters; i++) {
        for (int j = 0; j < numbAtt; j++) {
          double val = clusters[i].getCenterVec().getValue(j);
          int card   = cardClusters[i];
          if (card == 0)
            card = 1;
          val        = val / card;
          clusters[i].getCenterVec().setValue(j, val);
        };
      };
      numberOfIterations = numberOfIterations + 1;
    };
    
    /**
     * Assign containedVectors variable the appropriate value in the Cluster class.
     * This value can be used to transform to WEKA's Istances later for easy data visualization.
     * Similar code can be found in KLinkage.runAlgorithm() function.
     * 
     * By TWang. Jan 19, 2005.
     */
    // Init the Vectors
    Vector[] allContainedVectors = null;
    allContainedVectors = new Vector[numberOfClusters];
    for (int i = 0; i < numberOfClusters; i++){
    	allContainedVectors[i] = new Vector();
    }
    
    // Find nearest cluster for all vectors:
    for (int i = 0; i < numbVec; i++) {
      MiningVector mingVec =  miningInputStream.read(i); 
      int nC = clusterVector(mingVec); 
      allContainedVectors[nC].addElement(mingVec);
    };
    
    // Set the containedVectors variable 
    for (int i = 0; i < numberOfClusters; i++){
    	clusters[i].setContainedVectors(allContainedVectors[i]);
    }
    /**
     * End. By Twang. Jan 19, 2005.
     */

  }

  /**
   * Assign vector to nearest cluster.
   *
   * @param vec mining vector to be assigned to nearest cluster
   * @return number of the nearest cluster, -1 if no cluster exist
   * @exception MiningException cannot cluster vector
   */
  private int clusterVector(MiningVector vec) throws MiningException {

    if (clusters == null || clusters.length == 0)
      return -1;

    int nearestClust = 0;
    double minDist   = distance.distance( vec, clusters[0].getCenterVec() );
    for (int i = 1; i < numberOfClusters; i++) {
      double dist = distance.distance( vec, clusters[i].getCenterVec() );
      if (dist < minDist) {
        minDist      = dist;
        nearestClust = i;
      };
    };

    return nearestClust;
  }

  /**
 * Sets number of clusters.
 * @param numberOfClusters   new number of clusters
 * @uml.property   name="numberOfClusters"
 */
  public void setNumberOfClusters(int numberOfClusters)
  {
    this.numberOfClusters = numberOfClusters;
  }

  /**
 * Returns number of clusters.
 * @return   number of clusters
 * @uml.property   name="numberOfClusters"
 */
  public int getNumberOfClusters()
  {
    return numberOfClusters;
  }

  /**
 * Sets maximum number of iterations.
 * @param maxNumberOfIterations   new maximum number of iterations
 * @uml.property   name="maxNumberOfIterations"
 */
  public void setMaxNumberOfIterations(int maxNumberOfIterations)
  {
    this.maxNumberOfIterations = maxNumberOfIterations;
  }

  /**
 * Returns maximum number of iterations.
 * @return   maximum number of iteartions
 * @uml.property   name="maxNumberOfIterations"
 */
  public int getMaxNumberOfIterations()
  {
    return maxNumberOfIterations;
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -