⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 distance.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * Title: XELOPES Data Mining Library
 * Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
 * Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
 * Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
 * @author Michael Thess
 * @version 1.0
 */

package com.prudsys.pdm.Models.Clustering;

import com.prudsys.pdm.Core.AttributeType;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.PmmlPresentable;
import com.prudsys.pdm.Input.MiningVector;

/**
 * Class for calculating distance (or similarity) between 2 vectors in attribute space.
 * <p>
 * 
 * From PMML.
 * <p>
 * 
 * Corresponds to the PMML ComparisonMeasure element including all its elements inside.
 * 
 * @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
 */
public class Distance extends com.prudsys.pdm.Cwm.Core.Expression implements PmmlPresentable {
	// -----------------------------------------------------------------------
	// Constants of distance and comparison functions
	// -----------------------------------------------------------------------
	public static final int TYPE_EUCLIDEAN = 1;

	public static final int TYPE_SQUARED_EUCLIDEAN = 2;

	public static final int TYPE_CHEBYCHEV = 3;

	public static final int TYPE_CITY_BLOCK = 4;

	public static final int TYPE_MINKOVSKI = 5;

	public static final int TYPE_SIMPLE_MATCHING = 6;

	public static final int TYPE_JACCARD = 7;

	public static final int TYPE_TANIMOTO = 8;

	public static final int TYPE_BINARY_SIMILARITY = 9;

	public static final int COMPARISON_FUNCTION_ABS_DIFF = 101;

	public static final int COMPARISON_FUNCTION_GAUSS_SIM = 102;

	public static final int COMPARISON_FUNCTION_DELTA = 103;

	public static final int COMPARISON_FUNCTION_EQUAL = 104;

	public static final int COMPARISON_FUNCTION_TABLE = 105;

	public static final int MEASURE_TYPE_DISTANCE = 10001;

	public static final int MEASURE_TYPE_SIMILARITY = 10002;

	public static final double SIMILARITY_EPSILON = 0.000001;

	// -----------------------------------------------------------------------
	// Variables declarations
	// -----------------------------------------------------------------------
	/**
	 * Distance between vectors to be clustered, also refered to as comparison measure (PMML) or aggregation function
	 * (JDM).
	 */
	private int type = TYPE_EUCLIDEAN;

	/** Similarity type: Distance (also dissimilarity) or similarity. */
	private int measureType = MEASURE_TYPE_DISTANCE;

	/**
	 * Comparison function between attribute values, also refered to as similarity measure (PMML) or attribute
	 * comparison function (JDM).
	 */
	private int compareFunction = COMPARISON_FUNCTION_ABS_DIFF;

	/** Use [0,1] normalization for all attributes. */
	private boolean normalized = false;

	/** Norming constant if distance invertation is used as similarity measure. */
	private double simMeasNormConst = 1.0;

	/** Value of Minkowski parameter. Default: 2. */
	private double minkPar = 2.0;
	/** value of modified euclidean parameter.Default:8.0,added by Xiaoming Li 2006/03/29 */
	private double beta = 8.0;
	/** value of modified partition parameter. Default value: 8.0 added by Xiaoguang Xu 2006/05/16 */
	// private double alpha = 8.0;
	/** Array of minumum values of all attributes. Required for normalization. */
	private double[] minAtt;

	/** Array of maximum values of all attributes. Required for normalization. */
	private double[] maxAtt;

	/** Array of attribute weights. Default values are set to be 1. */
	private double[] fieldWeights;

	/** Can be used in PMML. */
	private double minCompareFunction;

	/** Can be used in PMML. */
	private double maxCompareFunction;

	// -----------------------------------------------------------------------
	// Constructors
	// -----------------------------------------------------------------------
	/**
	 * Empty constructor.
	 */
	public Distance() {
	}

	/**
	 * Constructor for a given number of attributes.
	 * 
	 * @param numbAttributes
	 *            number of attributes
	 */
	public Distance(int numbAttributes) {

		fieldWeights = new double[numbAttributes];
		for (int i = 0; i < numbAttributes; i++)
			fieldWeights[i] = 1.0;

		minAtt = new double[numbAttributes];
		maxAtt = new double[numbAttributes];
	}

	// -----------------------------------------------------------------------
	// Getter and setter methods
	// -----------------------------------------------------------------------
	/**
	 * Sets distance type (Euclidean, Squared Eucliden, City-Block, ...).
	 * 
	 * @param type
	 *            distance type
	 */
	public void setType(int type) {
		this.type = type;
	}

	/**
	 * Returns distance type (Euclidean, Squared Eucliden, City-Block, ...).
	 * 
	 * @return distance type
	 */
	public int getType() {
		return type;
	}

	/**
	 * Sets comparison function type (Abs Diff, Gauss-Sim, ...).
	 * 
	 * @param compareFunction
	 *            comparison function type
	 */
	public void setCompareFunction(int compareFunction) {
		this.compareFunction = compareFunction;
	}

	/**
	 * Returns comparison function type (Abs Diff, Gauss-Sim, ...).
	 * 
	 * @return comparison function type
	 */
	public int getCompareFunction() {
		return compareFunction;
	}

	/**
	 * Sets distance measure type (distance, similarity).
	 * 
	 * @param measureType
	 *            new measure type
	 */
	public void setMeasureType(int measureType) {
		this.measureType = measureType;
	}

	/**
	 * Returns distance measure type (distance, similarity).
	 * 
	 * @return measure type
	 */
	public int getMeasureType() {
		return measureType;
	}

	/**
	 * Sets array of field weights for all attributes.
	 * 
	 * @param fieldWeights
	 *            array of field weights
	 */
	public void setFieldWeights(double[] fieldWeights) {
		this.fieldWeights = fieldWeights;
	}

	/**
	 * Returns array of field weights for all attributes.
	 * 
	 * @return array of field weights
	 */
	public double[] getFieldWeights() {
		return fieldWeights;
	}

	/**
	 * Set normalization for distance calculation.
	 * 
	 * @param normalized
	 *            normalization or not
	 */
	public void setNormalized(boolean normalized) {
		this.normalized = normalized;
	}

	/**
	 * Returns whether normalization for distance calculation is used.
	 * 
	 * @return true if normalized, else false
	 */
	public boolean isNormalized() {
		return normalized;
	}

	/**
	 * Sets minimum for comparison function. Supplemantary.
	 * 
	 * @param minCompareFunction
	 *            minimum for comparison function
	 */
	public void setMinCompareFunction(double minCompareFunction) {
		this.minCompareFunction = minCompareFunction;
	}

	/**
	 * Returns minimum for comparison function.
	 * 
	 * @return minimum for comparison function
	 */
	public double getMinCompareFunction() {
		return minCompareFunction;
	}

	/**
	 * Sets maximum for comparison function. Supplementary.
	 * 
	 * @param maxCompareFunction
	 *            maximum for comparison function
	 */
	public void setMaxCompareFunction(double maxCompareFunction) {
		this.maxCompareFunction = maxCompareFunction;
	}

	/**
	 * Returns maximum for comparison function.
	 * 
	 * @return minimum for comparison function
	 */
	public double getMaxCompareFunction() {
		return maxCompareFunction;
	}

	/**
	 * Sets value of Minkovski parameter.
	 * 
	 * @param minkPar
	 *            new value of Minkovski parameter
	 */
	public void setMinkPar(double minkPar) {
		this.minkPar = minkPar;
	}
	/**
	 * gets value of Beta
	 * 
	 * @return value of Beta
	 */
	public double getBeta() {
		return beta;
	}
	/**
	 * sets value of Beta
	 * 
	 * @param beta
	 *            new value of Beta
	 */
	public void setBeta(double beta) {
		this.beta = beta;
	}

	/**
	 * gets value of Alpha
	 * 
	 * @return value of Alpha
	 */
	//public double getAlpha() {
		//return alpha;
	//}
	/**
	 * sets value of Alpha
	 * 
	 * @param alpha
	 *            new value of Alpha
	 */
	//public void setAlpha(double alpha) {
		//this.alpha = alpha;
	//}
	/**
	 * Returns value of Minkovski parameter.
	 * 
	 * @return value of Minkovski parameter
	 */
	public double getMinkPar() {
		return minkPar;
	}

	/**
	 * Sets array of minimum values of all attributes. Required for normalization.
	 * 
	 * @param minAtt
	 *            array of minima of all attributes
	 */
	public void setMinAtt(double[] minAtt) {
		this.minAtt = minAtt;
	}

	/**
	 * Returns array of minimum values of all attributes. Required for normalization.
	 * 
	 * @return array of minima of all attributes
	 */
	public double[] getMinAtt() {
		return minAtt;
	}

	/**
	 * Sets array of maximum values of all attributes. Required for normalization.
	 * 
	 * @param maxAtt
	 *            array of maxima of all attributes
	 */
	public void setMaxAtt(double[] maxAtt) {
		this.maxAtt = maxAtt;
	}

	/**
	 * Returns array of maximum values of all attributes. Required for normalization.
	 * 
	 * @return array of maxima of all attributes
	 */
	public double[] getMaxAtt() {
		return maxAtt;
	}

	/**
	 * Return norming constant applied if distance invertation is used as similarity measure.
	 * 
	 * @return norming constant of similarity measure
	 */
	public double getSimMeasNormConst() {
		return simMeasNormConst;
	}

	/**
	 * Set norming constant applied if distance invertation is used as similarity measure (default: 1).
	 * 
	 * @param simMeasNormConst
	 *            norm constant of similarit newy measure
	 */
	public void setSimMeasNormConst(double simMeasNormConst) {
		this.simMeasNormConst = simMeasNormConst;
	}

	// -----------------------------------------------------------------------
	// Methods of distance calculation
	// -----------------------------------------------------------------------
	/**
	 * Calculates distance between two mining vectors. Both vectors must have the same meta data.
	 * 
	 * @param vec1
	 *            mining vector 1
	 * @param vec2
	 *            mining vector 2
	 * @return distance between the two vectors
	 * @exception MiningException
	 *                cannot calculate distance
	 */
	public double distance(MiningVector vec1, MiningVector vec2) throws MiningException {

		// Initializations:
		int numbAtt = vec1.getValues().length;
		double[] weights = new double[numbAtt];
		if (fieldWeights == null) {
			for (int i = 0; i < numbAtt; i++)
				weights[i] = 1.0;
		} else
			weights = fieldWeights;

		// Distance (or similarity):
		double dist = 0.0;

		// Add type:
		switch (type) {
			case TYPE_EUCLIDEAN :
				for (int i = 0; i < numbAtt; i++) {
					double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
					dist = dist + weights[i] * diff * diff;
				}
				;
				// if (normalized) dist = dist / numbAtt; // causes inconsistencies
				dist = Math.sqrt(dist);

				if (measureType == MEASURE_TYPE_SIMILARITY)
					dist = 1.0 / (1.0 + dist / simMeasNormConst);

				break;
			case TYPE_SQUARED_EUCLIDEAN :
				for (int i = 0; i < numbAtt; i++) {
					double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
					dist = dist + weights[i] * diff * diff;
				}
				;

				if (measureType == MEASURE_TYPE_SIMILARITY)
					dist = 1.0 / (1.0 + dist / simMeasNormConst);

				break;
			case TYPE_CHEBYCHEV :
				for (int i = 0; i < numbAtt; i++) {
					double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
					diff = weights[i] * diff;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -