fwkmeans.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 810 行 · 第 1/2 页
JAVA
810 行
package com.prudsys.pdm.Models.Clustering.CDBased.Algorithms.FWKMeans;

import java.util.ArrayList;
import java.util.Random;
import java.util.Vector;
import com.prudsys.pdm.Core.AttributeType;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.Clustering.Cluster;
import com.prudsys.pdm.Models.Clustering.CDBased.CDBasedClusteringAlgorithm;

/**
 * implementation of a fuzzy automated variable weighting in k-means type clustering algorithm.
 * 
 * @author Xiaoguang Xu in HITSZ-ICE
 */
public class FWKMeans extends CDBasedClusteringAlgorithm {
	// ------------------------------------------------------------------
	// these variables are specified by user and
	// need to define in config File "algorithm.xml"
	// ------------------------------------------------------------------
	/**
	 * number of clusters to be generated by the algorithm.(is specified by user)
	 */
	private int numberOfClusters = 3;

	/**
	 * the maximum number of iterations (is specified by user),not used in this algorithm
	 */
	private int maxNumberOfIterations = 100;

	/** the number of iterations (is specified by user) */
	private int numberOfIterations;

	// **The parameter for partition, its valus must be greater than 1,and that its default is 2.*/

	private double alpha = 2.0;

	/** False no weighting process(is specified by user) */
	private boolean weight;

	// /newly addition;

	// ------------------------------------------------------------------
	// global variables in FWKMeas and are used by some method ,so
	// these variables don't need to define in config File "algorithm.xml"
	// ------------------------------------------------------------------
	/** the number of all attributes */
	private int numAtt;

	/** the number of all objects to be clustered */
	private int numVec;

	/** the number of categorical attributes */
	private int numOfCat;

	/** the number of numeric attributes */
	private int numOfNumeric;

	/** the index array of Categorical attributes in MiningVector */
	private int[] indexOfCate;

	/** the index array of Numeric attributes in MinningVector */
	private int[] indexOfNum;

	/** stores the mean value of each numberic attribute */
	private double[] meanValue;

	/** object cluster membership identification */
	private int[] objClusterID;

	/** object cluster membership weight identification */
	
	/** summation of values of variables in each cluster */
	// private double[][] totalSum;
	/** summation of distances of categorical attributes */
	// private long[][] misMatch;
	/** count the number of vectors(objects) in each cluster */
	//private int[] clusterCount;

	/** the percent of each instance belong to every cluster */

	private double[][] partition;

	public FWKMeans() {
	}

	/**
	 * checks mining algorithm for completeness by calling vefity mothod of superclass. Addtionally ,it checks whether
	 * numberOfClusters and maxNumberOfIterations are admittable.
	 * 
	 * @throws IllegalArgumentException
	 *             if some algorithm attibutes are incorrect.
	 */
	public void verify() throws IllegalArgumentException {
		super.verify();
		if (numberOfClusters < 0) {
			throw new IllegalArgumentException("numberOfClusters can't be negative");
		}
		if (maxNumberOfIterations < 0) {
			throw new IllegalArgumentException("maxNumberOfIterations can't be negative");
		}
		if (alpha < 1) {
			throw new IllegalArgumentException("Parameter fuzzy must be greater than 1");
		}
	}
	private void initialization(int numAtt) {
		this.numAtt = numAtt;
		this.indexOfCate = this.getIndexOfCate(numAtt);
		this.numOfCat = indexOfCate.length;
		this.indexOfNum = this.getIndexOfNum(numAtt);
		this.numOfNumeric = indexOfNum.length;
		this.meanValue = new double[numOfNumeric];
	}

	/**
	 * initializes the weights of all attributes.
	 * 
	 * @param the
	 *            number of attributes.
	 */
	private void weightInit() { // can be used directly!
		double[] weights = new double[numAtt];
		for (int i = 0; i < numAtt; i++) {
			weights[i] = 1.0 / numAtt;
		}
		distance.setFieldWeights(weights);
		// weights is used to compute the distance between two vectors. So it can
		// and should be defined in the class Distance.
	}

	/**
	 * chooses the numberOfClusters vectors(objects)randomly as the initial prototypes.
	 * 
	 * @param clusters
	 *            ,the cluster array
	 * @param numVec
	 *            ,the number of vectors(Objects).
	 * @throws MiningException
	 */
	private void prototypeInit() throws MiningException {
		boolean selected[] = new boolean[numVec];
		Random rand = new Random(10);
		for (int i = 0; i < numberOfClusters; i++) {
			int index = 0;
			do {
				index = Math.abs(rand.nextInt()) % numVec;
			} while (selected[index]);
			// Add center vector to cluster array:
			MiningVector vec = miningInputStream.read(index);
			clusters[i].setCenterVec(vec);

			selected[index] = true;
		}
	}

	/**
	 * compute object function value of P1 and assign every object to the nearest prototype
	 * 
	 * @param start,
	 *            the first index of vector(object)
	 * @param end,
	 *            the last index of vector(objec)
	 * @param numAtt,the
	 *            number of attributes
	 * @return the object function value of P1
	 * @throws MiningException
	 */
	private double getP1Cost(int start, int end) throws MiningException {

		updatePartition(start, end);

		double D1[][] = new double[numVec][numberOfClusters];
		double dispersion = 0.0;
		for (int i = start; i < end; i++)
		{

			
			MiningVector mingVec = miningInputStream.read(i);
			for (int k = 0; k < numberOfClusters; k++) {
				double numDist = 0.0;
				double catDist = 0.0;
				if (numOfNumeric > 0) {

					for (int j = 0; j < numOfNumeric; j++) {
						numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
								* Math.pow(mingVec.getValue(indexOfNum[j]), 2);
					}

				} else {
					numDist = 0.0;
				}
				if (numOfCat > 0) {
					for (int j = 0; j < numOfCat; j++) {
						if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
							catDist = catDist + 1.0;
						}

					}
				}

				else {
					catDist = 0.0;
				}
				D1[i][k] = Math.sqrt(numDist) + this.getGamma() * Math.sqrt(catDist);

				dispersion = dispersion + Math.pow(partition[i][k], alpha) * D1[i][k];

			}

		}
		return dispersion;

	}
	private double getP3Cost(int start, int end) throws MiningException {

		
		double D1[][] = new double[numVec][numberOfClusters];
		double dispersion = 0.0;
		for (int i = start; i < end; i++)// get the
		{

			// flag = 0;
			MiningVector mingVec = miningInputStream.read(i);
			for (int k = 0; k < numberOfClusters; k++) {
				double numDist = 0.0;
				double catDist = 0.0;
				if (numOfNumeric > 0) {

					for (int j = 0; j < numOfNumeric; j++) {
						numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
								* Math.pow(mingVec.getValue(indexOfNum[j]), 2);
					}

				} else {
					numDist = 0.0;
				}
				if (numOfCat > 0) {
					for (int j = 0; j < numOfCat; j++) {
						if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
							catDist = catDist + 1.0;
						}

					}
				}

				else {
					catDist = 0.0;
				}
				D1[i][k] = Math.sqrt(numDist) + this.getGamma() * Math.sqrt(catDist);

				dispersion = dispersion + Math.pow(partition[i][k], alpha) * D1[i][k];

			}

		}

		return dispersion;

	}
	private void updatePartition(int start, int end) throws MiningException { // having been updated!

		double PseuEeachInsDisToAllCenter;
		int flag = 0;
		for (int i = start; i < end; i++)// get the
		{
			double D1[] = new double[numberOfClusters];
			PseuEeachInsDisToAllCenter = 0.0;
			flag = 0;
			MiningVector mingVec = miningInputStream.read(i);
			for (int k = 0; k < numberOfClusters; k++) {
				double numDist = 0.0;
				double catDist = 0.0;
				if (numOfNumeric > 0) {

					for (int j = 0; j < numOfNumeric; j++) {
						double temporary = 0.0;
						temporary = Math.abs(mingVec.getValue(indexOfNum[j])
								- clusters[k].getCenterVec().getValue(indexOfNum[j]));
						numDist = numDist + Math.pow(distance.getFieldWeights()[indexOfNum[j]], distance.getBeta())
								* temporary * temporary;
					}

				} else {
					numDist = 0.0;
				}
				if (numOfCat > 0) {
					for (int j = 0; j < numOfCat; j++) {
						if (mingVec.getValue(indexOfCate[j]) != clusters[k].getCenterVec().getValue(indexOfCate[j])) {
							catDist = catDist + 1.0;
						}

					}
				}

				else {
					catDist = 0.0;
				}
				D1[k] = numDist + this.getGamma() * catDist;

			}
			for (int k = 0; k < numberOfClusters; k++) {
				if (D1[k] != 0) {
					PseuEeachInsDisToAllCenter = PseuEeachInsDisToAllCenter + Math.pow(1/D1[k], 1.0 / (alpha - 1));
				}
			}
			for (int k = 0; k < numberOfClusters; k++) {
				if (D1[k] == 0) {
					for (int s = 0; s < numberOfClusters; s++) {
						partition[i][s] = 0.0;
					}
					partition[i][k] = 1.0;
					flag = 1;
					break;
				}
			}
			if (flag == 1)
				continue;
			for (int k = 0; k < numberOfClusters; k++) {

				partition[i][k] = 1.0 / (Math.pow(D1[k], 1.0 / (alpha - 1)) * PseuEeachInsDisToAllCenter);
				// System.out.println(k);

			}

		}

	}

	/**
	 * Because in the Fuzzy weight K-means, each instance assigned to a cluster with a percent! Not integer 1 or 0; so
	 * we confirm each instace attribute to which cluster, by some definition.
	 * 
	 * @param partion
	 *            this is the parameter how much proporation a cluster take in one instance!
	 */
	private void confirmClusters(double partition[][]) {
		double maxPercent;
		for (int i = 0; i < numVec; i++) {
			maxPercent = partition[i][0];
			int whichcluster = 0;
			for (int j = 0; j < numberOfClusters; j++) {
				if (partition[i][j] > maxPercent) {
					maxPercent = partition[i][j];
					whichcluster = j;
				}
			}
			objClusterID[i] = whichcluster;
			// objClusterIDWeight[i] = maxPercent;
		}
	}

	/**
	 * get the parameter Gamma that parameter for weight balancing numeric and categorical If numeric attributes exist,
	 * if numeric attributes don't exist then set Gamma=1.0
	 * 
	 * @return Gamma
	 * @throws MiningException
	 */
	private double getGamma() throws MiningException { // can be used directly!
		double squareSum, gamma = 0.0;
		double[] globalGamma = new double[numOfNumeric];
		if (numOfNumeric > 0) {
			for (int j = 0; j < numOfNumeric; j++) {
				squareSum = 0.0;
				for (int i = 0; i < numVec; i++) {
					MiningVector vec = miningInputStream.read(i);
					squareSum += Math.pow(vec.getValue(indexOfNum[j]) - meanValue[j], 2);
				}
				globalGamma[j] = Math.sqrt(squareSum / (numVec - 1));
			}
			for (int j = 0; j < numOfNumeric; j++)
				gamma += globalGamma[j];
			gamma /= numOfNumeric;
			gamma = 0.3 * gamma * gamma;
		} else {
			gamma = 1.0;
		}
		return gamma;
	}
	/**
	 * Update variable weights
	 * 
	 * @param numArr,the
	 *            number of attributes.
	 * @param numVec,the
	 *            number of vectors (objects)to be clustered.
	 * @throws MiningException
	 */
	private void changeWeights(int numAtt, int numVec) throws MiningException {
		double weights[], total, minCatDl;
		// Dl = new double[numAtt];
		total = 0.0;
		weights = new double[numAtt];
		minCatDl = 0.5 * numVec / numberOfClusters;
		double DJ[] = new double[numAtt];
		if (numOfNumeric > 0) {
			for (int j = 0; j < numOfNumeric; j++) {
				double DD = 0.0;
				for (int i = 0; i < numVec; i++) {
					MiningVector mingVec = miningInputStream.read(i);
					for (int k = 0; k < numberOfClusters; k++) {
						double temp0 = mingVec.getValue(indexOfNum[j])
fwkmeans.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 fwkmeans.java 源码文件，采用 Java 编程语言编写，共 810 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?