distance.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 913 行 · 第 1/2 页
JAVA
913 行
					if (diff > dist)
						dist = diff;
				}
				;

				if (measureType == MEASURE_TYPE_SIMILARITY)
					dist = 1.0 / (1.0 + dist / simMeasNormConst);

				break;
			case TYPE_CITY_BLOCK :
				for (int i = 0; i < numbAtt; i++) {
					double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
					dist = dist + weights[i] * diff;
				}
				;

				if (measureType == MEASURE_TYPE_SIMILARITY)
					dist = 1.0 / (1.0 + dist / simMeasNormConst);

				break;
			case TYPE_MINKOVSKI :
				for (int i = 0; i < numbAtt; i++) {
					double diff = Math.abs(vec1.getValue(i) - vec2.getValue(i));
					dist = dist + weights[i] * Math.pow(diff, minkPar);
				}
				;
				dist = Math.pow(dist, (1.0 / minkPar));

				if (measureType == MEASURE_TYPE_SIMILARITY)
					dist = 1.0 / (1.0 + dist / simMeasNormConst);

				break;
			case TYPE_SIMPLE_MATCHING :
				double[] counts = getDistCounts(vec1, vec2);
				dist = (counts[0] + counts[3]) / (counts[0] + counts[1] + counts[2] + counts[3]);

				break;
			case TYPE_JACCARD :
				counts = getDistCounts(vec1, vec2);
				dist = counts[0] / (counts[0] + counts[1] + counts[2]);

				break;
			case TYPE_TANIMOTO :
				counts = getDistCounts(vec1, vec2);
				dist = (counts[0] + counts[3]) / (counts[0] + 2.0 * (counts[1] + counts[2]) + counts[3]);

				break;
			case TYPE_BINARY_SIMILARITY :
				throw new MiningException("Binary similarity not supported");

			default :
				throw new MiningException("Unknown distance type specified.");
		};

		return dist;
	}

	/**
	 * Calculates distance between two points of an attribute given by its meta data and its index.
	 * 
	 * Currently the compare functions of the types GAUSS_SIM and TABLE are not implemented and return a missing value.
	 * 
	 * @param metaData
	 *            meta data of vectors
	 * @param index
	 *            index of current attribute
	 * @param p1
	 *            point 1
	 * @param p2
	 *            point 2
	 * @return distance betwwen p1 and p2 on given attribute
	 * @exception MiningException
	 *                cannot calculate attribute point distance
	 */
	private double AttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {

		double dist = 0.0;

		switch (compareFunction) {
			case COMPARISON_FUNCTION_ABS_DIFF :
				dist = Math.abs(normValue(index, p1) - normValue(index, p2));

				break;
			case COMPARISON_FUNCTION_GAUSS_SIM :
				dist = Category.MISSING_VALUE;

				break;
			case COMPARISON_FUNCTION_DELTA :
				if (Math.abs(p1 - p2) < SIMILARITY_EPSILON)
					dist = 0.0;
				else
					dist = 1.0;

				break;
			case COMPARISON_FUNCTION_EQUAL :
				if (Math.abs(p1 - p2) < SIMILARITY_EPSILON)
					dist = 1.0;
				else
					dist = 0.0;

				break;
			case COMPARISON_FUNCTION_TABLE :
				dist = Category.MISSING_VALUE;

				break;
			default :
				throw new MiningException("Unknown comparison function specified.");
		};

		return dist;
	}
	// -----------------------------------------------------------------------
	// new added Methods of distance calculation for k-prototye algorithm
	// added by XiaoMing Li 2006/03/29
	// -----------------------------------------------------------------------
	/**
	 * 
	 * @param vec1
	 * @param vec2
	 * @param indexArray
	 * @param attributeType
	 * @return
	 * @throws MiningException
	 */
	public double distance(MiningVector vec1, MiningVector vec2, int[] indexArray, int attributeType)
			throws MiningException {
		// Initializations:
		// System.out.println("beta="+beta);
		int numbAtt = vec1.getValues().length;
		double[] weights = new double[numbAtt];
		if (fieldWeights == null) {
			for (int i = 0; i < numbAtt; i++)
				weights[i] = 1.0;
		} else
			weights = fieldWeights;

		// Distance (or similarity):
		double dist = 0.0;
		switch (type) {
			case TYPE_EUCLIDEAN :
				if (attributeType == AttributeType.NUMERICAL) {
					for (int i = 0; i < indexArray.length; i++) {
						int index = indexArray[i];
						double diff = numAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
						dist = dist + Math.pow(weights[i], beta) * diff * diff;
					};
				}
				if (attributeType == AttributeType.CATEGORICAL) {
					for (int i = 0; i < indexArray.length; i++) {
						int index = indexArray[i];
						double diff = catAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
						dist = dist + Math.pow(weights[i], beta) * diff * diff;
					};
				}
				dist = Math.sqrt(dist);
				break;
			case TYPE_SQUARED_EUCLIDEAN :
				if (attributeType == AttributeType.NUMERICAL) {
					for (int i = 0; i < indexArray.length; i++) {
						int index = indexArray[i];
						double diff = numAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
						dist = dist + Math.pow(weights[i], beta) * diff * diff;
					};
				}
				if (attributeType == AttributeType.CATEGORICAL) {
					for (int i = 0; i < indexArray.length; i++) {
						int index = indexArray[i];
						double diff = catAttDist(vec1.getMetaData(), index, vec1.getValue(index), vec2.getValue(index));
						dist = dist + Math.pow(weights[i], beta) * diff * diff;
					};
				}
				break;
		}
		return dist;
	}
	// -----------------------------------------------------------------------
	// new added Methods of partition percete calculation for F W Kmeans algorithm
	// added by XiaoGuang Xu 2006/05/17
	// -----------------------------------------------------------------------
	/**
	 * 
	 * @param vec1
	 * @param vec2
	 * @param indexArray
	 * @param attributeType
	 * @return
	 * @throws MiningException
	 */
	/*
	 * public double distanceD(MiningVector vec1, MiningVector vec2,int[]indexArray,int attributeType) throws
	 * MiningException {
	 * 
	 * int numbAtt = vec1.getValues().length; double[] weights = new double[numbAtt]; if (fieldWeights == null) { for
	 * (int i = 0; i < numbAtt; i++) weights[i] = 1.0; } else weights = fieldWeights; double dist = 0.0; switch(type) {
	 * case TYPE_EUCLIDEAN: if(attributeType==AttributeType.NUMERICAL) { for (int i = 0; i < indexArray.length; i++) {
	 * int index=indexArray[i]; double diff = numAttDist( vec1.getMetaData(), index, vec1.getValue(index),
	 * vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; }
	 * if(attributeType==AttributeType.CATEGORICAL) { for (int i = 0; i < indexArray.length; i++) { int
	 * index=indexArray[i]; double diff = catAttDist( vec1.getMetaData(), index, vec1.getValue(index),
	 * vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; } dist = Math.sqrt( dist ); break;
	 * case TYPE_SQUARED_EUCLIDEAN: if(attributeType==AttributeType.NUMERICAL) { for (int i = 0; i < indexArray.length;
	 * i++) { int index=indexArray[i]; double diff = numAttDist( vec1.getMetaData(), index, vec1.getValue(index),
	 * vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; }
	 * if(attributeType==AttributeType.CATEGORICAL) { for (int i = 0; i < indexArray.length; i++) { int
	 * index=indexArray[i]; double diff = catAttDist( vec1.getMetaData(), index, vec1.getValue(index),
	 * vec2.getValue(index) ); dist = dist + Math.pow(weights[i],alpha)*diff*diff; }; } break; } return dist; }
	 */
	// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	// /End XiaoguangXu
	// /
	// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	/**
	 * 
	 * @param metaData
	 * @param i
	 * @param value
	 * @param value2
	 * @return
	 */
	private double catAttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {
		double dist = 0.0;
		if (Math.abs(p1 - p2) == 0)
			dist = 0.0;
		else
			dist = 1.0;
		return dist;
	}
	/**
	 * 
	 * @param metaData
	 * @param i
	 * @param value
	 * @param value2
	 * @return
	 */
	private double numAttDist(MiningDataSpecification metaData, int index, double p1, double p2) throws MiningException {
		double dist = 0.0;
		dist = Math.abs(p1 - p2);
		return dist;
	}

	/**
	 * Returns normalized value or value, if no normalization is specified.
	 * 
	 * @param index
	 *            attribute index
	 * @param value
	 *            value to be normalized
	 * @return normalized value
	 * @exception MiningException
	 *                cannot normalize value
	 */
	private double normValue(int index, double value) throws MiningException {

		double normValue = value;

		// Use normalization:
		if (normalized) {
			if (minAtt == null || maxAtt == null)
				throw new MiningException("Normalization required but min/maxAtt not specified.");

			if (Double.isNaN(minAtt[index]) || Math.abs(maxAtt[index] - minAtt[index]) < SIMILARITY_EPSILON)
				normValue = 0.0;
			else
				normValue = (value - minAtt[index]) / (maxAtt[index] - minAtt[index]);
		};

		return normValue;
	}

	/**
	 * Get counts a11, a10, a01, a00 for basically binary attributes. a11 = number of times where vec1(i) = 1 and
	 * vec2(i) = 1 a10 = number of times where vec1(i) = 1 and vec2(i) = 0 a01 = number of times where vec1(i) = 0 and
	 * vec2(i) = 1 a00 = number of times where vec1(i) = 0 and vec2(i) = 0
	 * 
	 * @param vec1
	 *            mining vector 1
	 * @param vec2
	 *            mining vector 2
	 * @return array of a11, a10, a01, a00 (in this order)
	 */
	private double[] getDistCounts(MiningVector vec1, MiningVector vec2) {

		double[] counts = new double[4];
		for (int i = 0; i < vec1.getValues().length; i++) {
			boolean vec1Is1 = (Math.abs(vec1.getValue(i) - 1.0) < SIMILARITY_EPSILON);
			boolean vec1Is0 = (Math.abs(vec1.getValue(i) - 0.0) < SIMILARITY_EPSILON);
			boolean vec2Is1 = (Math.abs(vec2.getValue(i) - 1.0) < SIMILARITY_EPSILON);
			boolean vec2Is0 = (Math.abs(vec2.getValue(i) - 0.0) < SIMILARITY_EPSILON);
			if (vec1Is1 && vec2Is1)
				counts[0] = counts[0] + 1;
			if (vec1Is1 && vec2Is0)
				counts[1] = counts[1] + 1;
			if (vec1Is0 && vec2Is1)
				counts[2] = counts[2] + 1;
			if (vec1Is0 && vec2Is0)
				counts[3] = counts[3] + 1;
		}

		return counts;
	}

	// -----------------------------------------------------------------------
	// Methods of PMML handling
	// -----------------------------------------------------------------------
	/**
	 * Creates PMML object ComparisonMeasure from distance. Attention: normalization tag cannot be stored in PMML model.
	 * For normalization, use corresponding transformations before clustering!
	 * 
	 * @return PMML object of Distance
	 * @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
	 * @exception MiningException
	 *                cannot create PMML object
	 */
	public Object createPmmlObject() throws MiningException {

		// Create instance of PMMLs ComparisonMesaure object:
		com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure cm = new com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure();

		// Add distance type:
		if (measureType == MEASURE_TYPE_DISTANCE)
			cm.setKind("distance");
		else
			cm.setKind("similarity");

		// Add compare function:
		switch (compareFunction) {
			case COMPARISON_FUNCTION_ABS_DIFF :
				cm.setCompareFunction("absDiff");
				break;
			case COMPARISON_FUNCTION_GAUSS_SIM :
				cm.setCompareFunction("gaussSim");
				break;
			case COMPARISON_FUNCTION_DELTA :
				cm.setCompareFunction("delta");
				break;
			case COMPARISON_FUNCTION_EQUAL :
				cm.setCompareFunction("equal");
				break;
			case COMPARISON_FUNCTION_TABLE :
				cm.setCompareFunction("table");
				break;
			default :
				throw new MiningException("Unknown comparison function specified.");
		};
		cm.setMinimum(String.valueOf(minCompareFunction));
		cm.setMaximum(String.valueOf(maxCompareFunction));

		// Add type:
		switch (type) {
			case TYPE_EUCLIDEAN :
				com.prudsys.pdm.Adapters.PmmlVersion20.Euclidean euclidean = new com.prudsys.pdm.Adapters.PmmlVersion20.Euclidean();
				cm.setEuclidean(euclidean);
				break;
			case TYPE_SQUARED_EUCLIDEAN :
				com.prudsys.pdm.Adapters.PmmlVersion20.SquaredEuclidean sqEuclidean = new com.prudsys.pdm.Adapters.PmmlVersion20.SquaredEuclidean();
				cm.setSquaredEuclidean(sqEuclidean);
				break;
			case TYPE_CHEBYCHEV :
				com.prudsys.pdm.Adapters.PmmlVersion20.Chebychev chebychev = new com.prudsys.pdm.Adapters.PmmlVersion20.Chebychev();
				cm.setChebychev(chebychev);
				break;
			case TYPE_CITY_BLOCK :
				com.prudsys.pdm.Adapters.PmmlVersion20.CityBlock cityBlock = new com.prudsys.pdm.Adapters.PmmlVersion20.CityBlock();
				cm.setCityBlock(cityBlock);
				break;
			case TYPE_MINKOVSKI :
				com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski minkowski = new com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski();
				minkowski.setPParameter(String.valueOf(minkPar));
				cm.setMinkowski(minkowski);
				break;
			case TYPE_SIMPLE_MATCHING :
				com.prudsys.pdm.Adapters.PmmlVersion20.SimpleMatching simpleMatching = new com.prudsys.pdm.Adapters.PmmlVersion20.SimpleMatching();
				cm.setSimpleMatching(simpleMatching);
				break;
			case TYPE_JACCARD :
				com.prudsys.pdm.Adapters.PmmlVersion20.Jaccard jaccard = new com.prudsys.pdm.Adapters.PmmlVersion20.Jaccard();
				cm.setJaccard(jaccard);
				break;
			case TYPE_TANIMOTO :
				com.prudsys.pdm.Adapters.PmmlVersion20.Tanimoto tanimoto = new com.prudsys.pdm.Adapters.PmmlVersion20.Tanimoto();
				cm.setTanimoto(tanimoto);
				break;
			case TYPE_BINARY_SIMILARITY :
				com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity binSimilarity = new com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity();
				cm.setBinarySimilarity(binSimilarity);
				break;
			default :
				throw new MiningException("Unknown distance type specified.");
		};

		return cm;
	}

	/**
	 * Reades ComparisonMeasure from PMML object.
	 * 
	 * @param pmmlObject
	 *            PMML object of Distance
	 * @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
	 * @exception MiningException
	 *                cannot read PMML object
	 */
	public void parsePmmlObject(Object pmmlObject) throws MiningException {

		// Get PMMLs ComparisonMeasure object:
		com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure cm = (com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure) pmmlObject;

		// Get distance type:
		if (cm.getKind().equals("distance"))
			measureType = MEASURE_TYPE_DISTANCE;
		if (cm.getKind().equals("similarity"))
			measureType = MEASURE_TYPE_SIMILARITY;

		// Get compare function:
		if (cm.getCompareFunction().equals("absDiff"))
			compareFunction = COMPARISON_FUNCTION_ABS_DIFF;
		if (cm.getCompareFunction().equals("gaussSime"))
			compareFunction = COMPARISON_FUNCTION_GAUSS_SIM;
		if (cm.getCompareFunction().equals("delta"))
			compareFunction = COMPARISON_FUNCTION_DELTA;
		if (cm.getCompareFunction().equals("equal"))
			compareFunction = COMPARISON_FUNCTION_EQUAL;
		if (cm.getCompareFunction().equals("table"))
			compareFunction = COMPARISON_FUNCTION_TABLE;
		minCompareFunction = Double.parseDouble(cm.getMinimum());
		maxCompareFunction = Double.parseDouble(cm.getMaximum());

		// Get type:
		if (cm.getEuclidean() != null)
			type = TYPE_EUCLIDEAN;
		if (cm.getSquaredEuclidean() != null)
			type = TYPE_SQUARED_EUCLIDEAN;
		if (cm.getChebychev() != null)
			type = TYPE_CHEBYCHEV;
		if (cm.getCityBlock() != null)
			type = TYPE_CITY_BLOCK;
		if (cm.getMinkowski() != null) {
			type = TYPE_MINKOVSKI;
			com.prudsys.pdm.Adapters.PmmlVersion20.Minkowski minkowski = cm.getMinkowski();
			minkPar = Double.parseDouble(minkowski.getPParameter());
		};
		if (cm.getSimpleMatching() != null)
			type = TYPE_SIMPLE_MATCHING;
		if (cm.getJaccard() != null)
			type = TYPE_JACCARD;
		if (cm.getTanimoto() != null)
			type = TYPE_TANIMOTO;
		if (cm.getBinarySimilarity() != null) {
			type = TYPE_BINARY_SIMILARITY;
			com.prudsys.pdm.Adapters.PmmlVersion20.BinarySimilarity binSimilarity = cm.getBinarySimilarity();
			// ... //
		};
	}

}
distance.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 distance.java 源码文件，采用 Java 编程语言编写，共 913 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?