📄 wkmeans.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
						indexOfCate, AttributeType.CATEGORICAL);
			else
				catDist = 0.0;
			dist = numDist + this.getGamma() * catDist;
			if (dist < dmin) {
				dmin = dist;
				objClusterID[vecIndex] = i;
			}
		}
		return dmin;
	}

	/**
	 * Update variable weights
	 * 
	 * @param numArr,the
	 *            number of attributes.
	 * @param numVec,the
	 *            number of vectors (objects)to be clustered.
	 * @throws MiningException
	 */
	private void changeWeights(int numAtt, int numVec) throws MiningException {
		double Dl[], weights[], temp, total, totalWeight, minCatDl;
		Dl = new double[numAtt];
		weights = new double[numAtt];
		total = 0.0;
		totalWeight = 0.0;
		minCatDl = 0.5 * numVec / numberOfClusters;
		for (int j = 0; j < numOfNumeric; j++) {
			for (int i = 0; i < numVec; i++) {
				MiningVector mingVec = miningInputStream.read(i);
				temp = mingVec.getValue(indexOfNum[j])
						- clusters[objClusterID[i]].getCenterVec().getValue(
								indexOfNum[j]);
				Dl[indexOfNum[j]] += temp * temp;
			}
		}
		for (int j = 0; j < numOfCat; j++) {
			for (int i = 0; i < numVec; i++) {
				MiningVector mingVec = miningInputStream.read(i);
				if (mingVec.getValue(indexOfCate[j]) != clusters[objClusterID[i]]
						.getCenterVec().getValue(indexOfCate[j]))
					Dl[indexOfCate[j]] += 1;
			}
			if (Dl[indexOfCate[j]] < minCatDl)
				Dl[indexOfCate[j]] = minCatDl;

		}
		/*
		 * for(int j=0;j<numOfNumeric;j++)
		 * total+=Math.pow(1.0/Dl[indexOfNum[j]],1.0/(distance.getBeta()-1));
		 * for(int j=0;j<numOfCat;j++)
		 * total+=Math.pow(1.0/Dl[indexOfCate[j]],1.0/(distance.getBeta()-1));
		 */
		for (int j = 0; j < numAtt; j++) {
			total += Math.pow(1.0 / Dl[j], 1.0 / (distance.getBeta() - 1));
		}

		for (int j = 0; j < numAtt; j++) {
			weights[j] = 1.0 / (Math.pow(Dl[j], 1.0 / (distance.getBeta() - 1)) * total);
			totalWeight += weights[j];
		}
		distance.setFieldWeights(weights);
	}

	/**
	 * method to change prototypes of all attributes. for the
	 * CategoricalAttribute, Select the most frequent category of each
	 * categorical attribute as the prototype values of the cluster.
	 * 
	 * @param cluster
	 * @param numAtt
	 *            ,the number of all attributes.
	 */
	private void changePrototype(int cluster) {
		double maxFreq;
		int index;
		double[] value = new double[numAtt];
		MiningVector centerVec = new MiningVector(value);
		centerVec.setMetaData(metaData);

		/** change prototypes of numeric attributes */
		for (int j = 0; j < numOfNumeric; j++) {
			centerVec.setValue(indexOfNum[j], totalSum[cluster][j]
					/ clusterCount[cluster]);
		}

		/** change prototypes of categorical attributes */
		for (int j = 0; j < numOfCat; j++) {
			maxFreq = 0.0;
			index = 0;// may have a bug.
			for (int k = 0; k < clusterCateVar[cluster][j].size(); k++) {
				int freq = ((Integer) clusterCateVar[cluster][j].get(k))
						.intValue();
				if (freq > maxFreq) {
					maxFreq = freq;
					index = k;
				}
			}

			centerVec.setValue(indexOfCate[j], index);
		}
		clusters[cluster].setCenterVec(centerVec);
	}

	/**
	 * run WKMeans algorithm
	 * 
	 * @exception MiningException
	 *                can't run algorithm
	 */
	protected void runAlgorithm() throws MiningException {
		/* the number of data objects to be clustered */
		int numVec = 0;

		/* get the number of all attributes */
		int numAtt = metaData.getAttributesNumber();
		/* initialize the global variables */
		this.initialization(numAtt);
		/*
		 * computes the number of data objects to be clutered and computes the
		 * mean value of each numeric attribute. we can invoke the method
		 * 'miningInputStream.getVectorNumber()'to get the the number of data
		 * objects to be clutered
		 */
		while (miningInputStream.next()) {
			MiningVector vec = miningInputStream.read();
			for (int j = 0; j < numOfNumeric; j++) {
				meanValue[j] += vec.getValue(indexOfNum[j]);
			}
			numVec++;
		}
		for (int j = 0; j < numOfNumeric; j++) {
			meanValue[j] /= numVec;
		}
		this.numVec = numVec;
		this.objClusterID = new int[numVec];
		// -------------test-------------------------
		// this.println(numAtt,numVec);
		// ------------------------------------------

		/* initilizes the weight of each attribute */
		if (distance.getFieldWeights() == null && weight) {
			this.weightInit();
		}

		/* creates array of clusters */
		clusters = new Cluster[numberOfClusters];
		for (int i = 0; i < numberOfClusters; i++) {
			clusters[i] = new Cluster();
			clusters[i].setName("clust" + String.valueOf(i));
		}

		/*
		 * chooses numberOfClusters vectors(objects)randomly as initial
		 * prototypes
		 */
		this.prototypeInit();

		// ---------------------------------------------
		System.out.println("initial prototypes:");
		for (int i = 0; i < numberOfClusters; i++) {
			System.out.println(" Cluster[" + i + "]:"
					+ clusters[i].getCenterVec());
		}
		// ---------------------------------------------
		// ----------------------------------------------
		if (weight) {
			double[] w = distance.getFieldWeights();
			System.out.print("initial weights:");
			for (int i = 0; i < w.length; i++) {
				System.out.print(" " + w[i]);
			}
			System.out.println();
			System.out.println("---------------------------------");
		}
		// -----------------------------------------------
		double dispersion, dispersion1;

		/*
		 * compute object function value of P1 and assign every object to the
		 * nearest prototype
		 */
		dispersion = this.getP1Cost(0, numVec);

		/* the process of clustering */
		while (true) {
			for (int i = 0; i < numberOfClusters; i++) {
				this.changePrototype(i);
			}
			// ---------------------------------------------
			System.out.println("new prototypes:");
			for (int i = 0; i < numberOfClusters; i++) {
				System.out.println(" Cluster[" + i + "]:"
						+ clusters[i].getCenterVec());
			}
			// ---------------------------------------------

			/* update attibutes Weight */
			if (weight) {
				this.changeWeights(numAtt, numVec);
			}
			dispersion1 = this.getP2Cost(0, numVec);
			// ------------------------------------------------------------
			if (weight) {
				double[] w1 = distance.getFieldWeights();
				System.out.print("new weights:");
				for (int i = 0; i < w1.length; i++) {
					System.out.print(" " + w1[i]);
				}
				System.out.println();
				System.out.println("---------------------------------");
			}
			// --------------------------------------------------------------
			numberOfIterations++;
			if (dispersion1 < 0.000001
					|| Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001)
				break;
			dispersion = this.getP1Cost(0, numVec);
			numberOfIterations++;
			if (dispersion < 0.000001
					|| Math.abs(dispersion - dispersion1) / dispersion1 < 0.0001)
				break;

		}
		// -----------------------------------------------------------------------
		System.out.println("numberOfIterations=" + numberOfIterations);
		// -----------------------------------------------------------------------
		/**
		 * Assign containedVectors variable the appropriate value in the Cluster
		 * class. This value can be used to transform to WEKA's Istances later
		 * for easy data visualization. Similar code can be found in
		 * KLinkage.runAlgorithm() function.
		 */
		Vector[] allContainedVectors = new Vector[numberOfClusters];
		for (int i = 0; i < numberOfClusters; i++) {
			allContainedVectors[i] = new Vector();
		}
		for (int i = 0; i < numVec; i++) {
			MiningVector mingVec = miningInputStream.read(i);
			allContainedVectors[objClusterID[i]].addElement(mingVec);
		}
		/* Set the containedVectors variable */
		for (int i = 0; i < numberOfClusters; i++) {
			clusters[i].setContainedVectors(allContainedVectors[i]);
		}
	}

	/**
	 * returns maximum number of iterations.
	 * @return   maximum number of iterations.
	 * @uml.property   name="maxNumberOfIterations"
	 */
	public int getMaxNumberOfIterations() {
		return maxNumberOfIterations;
	}

	/**
	 * sets maximum number of iterations.
	 * @param maxNumberOfIterations   new maximum number of iterations.
	 * @uml.property   name="maxNumberOfIterations"
	 */
	public void setMaxNumberOfIterations(int maxNumberOfIterations) {
		this.maxNumberOfIterations = maxNumberOfIterations;
	}

	/**
	 * returns number of clusters.
	 * @return   number of clusters.
	 * @uml.property   name="numberOfClusters"
	 */
	public int getNumberOfClusters() {
		return numberOfClusters;
	}

	/**
	 * sets number of clusters.
	 * @param numberOfClusters   new number of clusters.
	 * @uml.property   name="numberOfClusters"
	 */

	public void setNumberOfClusters(int numberOfClusters) {
		this.numberOfClusters = numberOfClusters;
	}

	/**
	 * @return
	 * @uml.property   name="weight"
	 */
	public boolean isWeight() {
		return weight;
	}

	/**
	 * @param weight
	 * @uml.property   name="weight"
	 */
	public void setWeight(boolean weight) {
		this.weight = weight;
	}

	/**
	 * gets the the index array of Categorical attributes in MiningVector.
	 * 
	 * @return the index array of Categorical attributes in MiningVector.
	 * @param numAtt,the
	 *            number of all attributes.
	 */
	public int[] getIndexOfCate(int numAtt) {
		int numberOfCate = 0;
		int[] indexOfCate;
		Vector temp = new Vector();
		for (int i = 0; i < numAtt; i++) {
			if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
				numberOfCate++;
				temp.addElement(new Integer(i));
			}
		}
		indexOfCate = new int[numberOfCate];
		for (int i = 0; i < numberOfCate; i++) {
			indexOfCate[i] = ((Integer) temp.elementAt(i)).intValue();
		}
		return indexOfCate;
	}

	/**
	 * gets the the index array of numric attributes in MiningVector.
	 * 
	 * @return the index array of numric attributes in MiningVector.
	 * @param numAtt,
	 *            the number of all attributes.
	 */
	public int[] getIndexOfNum(int numAtt) {
		int numberOfNumeric = 0;
		int[] indexOfNumeric;
		Vector temp = new Vector();
		for (int i = 0; i < numAtt; i++) {
			if (metaData.getMiningAttribute(i) instanceof NumericAttribute) {
				numberOfNumeric++;
				temp.addElement(new Integer(i));
			}
		}
		indexOfNumeric = new int[numberOfNumeric];
		for (int i = 0; i < numberOfNumeric; i++) {
			indexOfNumeric[i] = ((Integer) temp.elementAt(i)).intValue();
		}
		return indexOfNumeric;
	}

	/*
	 * for testing
	 */
	public void println(int numAtt, int numVec) {
		System.out.println("numAtt=" + numAtt);
		System.out.println("numVec=" + numVec);
		System.out.println("numOfCat=" + numOfCat);
		System.out.println("numOfNumeric=" + numOfNumeric);
		System.out.print("indexOfNum=");
		for (int i = 0; i < numOfNumeric; i++) {
			System.out.print(" ");
			System.out.print(indexOfNum[i]);
		}
		System.out.println();
		System.out.print("indexOfCate=");
		for (int i = 0; i < numOfCat; i++) {
			System.out.print(" ");
			System.out.print(indexOfCate[i]);
		}
		System.out.println("");
		System.out.println("------------------------------------");
	}
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -