📄 distance.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Models.Clustering;
import com.prudsys.pdm.Core.AttributeType;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.PmmlPresentable;
import com.prudsys.pdm.Input.MiningVector;
/**
* Class for calculating distance (or similarity) between 2 vectors in attribute space.
* <p>
*
* From PMML.
* <p>
*
* Corresponds to the PMML ComparisonMeasure element including all its elements inside.
*
* @see com.prudsys.pdm.Adapters.PmmlVersion20.ComparisonMeasure
*/
public class Distance extends com.prudsys.pdm.Cwm.Core.Expression implements PmmlPresentable {
// -----------------------------------------------------------------------
// Constants of distance and comparison functions
// -----------------------------------------------------------------------
public static final int TYPE_EUCLIDEAN = 1;
public static final int TYPE_SQUARED_EUCLIDEAN = 2;
public static final int TYPE_CHEBYCHEV = 3;
public static final int TYPE_CITY_BLOCK = 4;
public static final int TYPE_MINKOVSKI = 5;
public static final int TYPE_SIMPLE_MATCHING = 6;
public static final int TYPE_JACCARD = 7;
public static final int TYPE_TANIMOTO = 8;
public static final int TYPE_BINARY_SIMILARITY = 9;
public static final int COMPARISON_FUNCTION_ABS_DIFF = 101;
public static final int COMPARISON_FUNCTION_GAUSS_SIM = 102;
public static final int COMPARISON_FUNCTION_DELTA = 103;
public static final int COMPARISON_FUNCTION_EQUAL = 104;
public static final int COMPARISON_FUNCTION_TABLE = 105;
public static final int MEASURE_TYPE_DISTANCE = 10001;
public static final int MEASURE_TYPE_SIMILARITY = 10002;
public static final double SIMILARITY_EPSILON = 0.000001;
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/**
* Distance between vectors to be clustered, also refered to as comparison measure (PMML) or aggregation function
* (JDM).
*/
private int type = TYPE_EUCLIDEAN;
/** Similarity type: Distance (also dissimilarity) or similarity. */
private int measureType = MEASURE_TYPE_DISTANCE;
/**
* Comparison function between attribute values, also refered to as similarity measure (PMML) or attribute
* comparison function (JDM).
*/
private int compareFunction = COMPARISON_FUNCTION_ABS_DIFF;
/** Use [0,1] normalization for all attributes. */
private boolean normalized = false;
/** Norming constant if distance invertation is used as similarity measure. */
private double simMeasNormConst = 1.0;
/** Value of Minkowski parameter. Default: 2. */
private double minkPar = 2.0;
/** value of modified euclidean parameter.Default:8.0,added by Xiaoming Li 2006/03/29 */
private double beta = 8.0;
/** value of modified partition parameter. Default value: 8.0 added by Xiaoguang Xu 2006/05/16 */
// private double alpha = 8.0;
/** Array of minumum values of all attributes. Required for normalization. */
private double[] minAtt;
/** Array of maximum values of all attributes. Required for normalization. */
private double[] maxAtt;
/** Array of attribute weights. Default values are set to be 1. */
private double[] fieldWeights;
/** Can be used in PMML. */
private double minCompareFunction;
/** Can be used in PMML. */
private double maxCompareFunction;
// -----------------------------------------------------------------------
// Constructors
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public Distance() {
}
/**
* Constructor for a given number of attributes.
*
* @param numbAttributes
* number of attributes
*/
public Distance(int numbAttributes) {
fieldWeights = new double[numbAttributes];
for (int i = 0; i < numbAttributes; i++)
fieldWeights[i] = 1.0;
minAtt = new double[numbAttributes];
maxAtt = new double[numbAttributes];
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Sets distance type (Euclidean, Squared Eucliden, City-Block, ...).
*
* @param type
* distance type
*/
public void setType(int type) {
this.type = type;
}
/**
* Returns distance type (Euclidean, Squared Eucliden, City-Block, ...).
*
* @return distance type
*/
public int getType() {
return type;
}
/**
* Sets comparison function type (Abs Diff, Gauss-Sim, ...).
*
* @param compareFunction
* comparison function type
*/
public void setCompareFunction(int compareFunction) {
this.compareFunction = compareFunction;
}
/**
* Returns comparison function type (Abs Diff, Gauss-Sim, ...).
*
* @return comparison function type
*/
public int getCompareFunction() {
return compareFunction;
}
/**
* Sets distance measure type (distance, similarity).
*
* @param measureType
* new measure type
*/
public void setMeasureType(int measureType) {
this.measureType = measureType;
}
/**
* Returns distance measure type (distance, similarity).
*
* @return measure type
*/
public int getMeasureType() {
return measureType;
}
/**
* Sets array of field weights for all attributes.
*
* @param fieldWeights
* array of field weights
*/
public void setFieldWeights(double[] fieldWeights) {
this.fieldWeights = fieldWeights;
}
/**
* Returns array of field weights for all attributes.
*
* @return array of field weights
*/
public double[] getFieldWeights() {
return fieldWeights;
}
/**
* Set normalization for distance calculation.
*
* @param normalized
* normalization or not
*/
public void setNormalized(boolean normalized) {
this.normalized = normalized;
}
/**
* Returns whether normalization for distance calculation is used.
*
* @return true if normalized, else false
*/
public boolean isNormalized() {
return normalized;
}
/**
* Sets minimum for comparison function. Supplemantary.
*
* @param minCompareFunction
* minimum for comparison function
*/
public void setMinCompareFunction(double minCompareFunction) {
this.minCompareFunction = minCompareFunction;
}
/**
* Returns minimum for comparison function.
*
* @return minimum for comparison function
*/
public double getMinCompareFunction() {
return minCompareFunction;
}
/**
* Sets maximum for comparison function. Supplementary.
*
* @param maxCompareFunction
* maximum for comparison function
*/
public void setMaxCompareFunction(double maxCompareFunction) {
this.maxCompareFunction = maxCompareFunction;
}
/**
* Returns maximum for comparison function.
*
* @return minimum for comparison function
*/
public double getMaxCompareFunction() {
return maxCompareFunction;
}
/**
* Sets value of Minkovski parameter.
*
* @param minkPar
* new value of Minkovski parameter
*/
public void setMinkPar(double minkPar) {
this.minkPar = minkPar;
}
/**
* gets value of Beta
*
* @return value of Beta
*/
public double getBeta() {
return beta;
}
/**
* sets value of Beta
*
* @param beta
* new value of Beta
*/
public void setBeta(double beta) {
this.beta = beta;
}
/**
* gets value of Alpha
*
* @return value of Alpha
*/
//public double getAlpha() {
//return alpha;
//}
/**
* sets value of Alpha
*
* @param alpha
* new value of Alpha
*/
//public void setAlpha(double alpha) {
//this.alpha = alpha;
//}
/**
* Returns value of Minkovski parameter.
*
* @return value of Minkovski parameter
*/
public double getMinkPar() {
return minkPar;
}
/**
* Sets array of minimum values of all attributes. Required for normalization.
*
* @param minAtt
* array of minima of all attributes
*/
public void setMinAtt(double[] minAtt) {
this.minAtt = minAtt;
}
/**
* Returns array of minimum values of all attributes. Required for normalization.
*
* @return array of minima of all attributes
*/
public double[] getMinAtt() {
return minAtt;
}
/**
* Sets array of maximum values of all attributes. Required for normalization.
*
* @param maxAtt
* array of maxima of all attributes
*/
public void setMaxAtt(double[] maxAtt) {
this.maxAtt = maxAtt;
}
/**
* Returns array of maximum values of all attributes. Required for normalization.
*
* @return array of maxima of all attributes
*/
public double[] getMaxAtt() {
return maxAtt;
}
/**
* Return norming constant applied if distance invertation is used as similarity measure.
*
* @return norming constant of similarity measure
*/
public double getSimMeasNormConst() {
return simMeasNormConst;
}
/**
* Set norming constant applied if distance invertation is used as similarity measure (default: 1).
*
* @param simMeasNormConst
* norm constant of similarit newy measure
*/
public void setSimMeasNormConst(double simMeasNormConst) {
this.simMeasNormConst = simMeasNormConst;
}
// -----------------------------------------------------------------------
// Methods of distance calculation
// -----------------------------------------------------------------------
/**
* Calculates distance between two mining vectors. Both vectors must have the same meta data.
*
* @param vec1
* mining vector 1
* @param vec2
* mining vector 2
* @return distance between the two vectors
* @exception MiningException
* cannot calculate distance
*/
public double distance(MiningVector vec1, MiningVector vec2) throws MiningException {
// Initializations:
int numbAtt = vec1.getValues().length;
double[] weights = new double[numbAtt];
if (fieldWeights == null) {
for (int i = 0; i < numbAtt; i++)
weights[i] = 1.0;
} else
weights = fieldWeights;
// Distance (or similarity):
double dist = 0.0;
// Add type:
switch (type) {
case TYPE_EUCLIDEAN :
for (int i = 0; i < numbAtt; i++) {
double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
dist = dist + weights[i] * diff * diff;
}
;
// if (normalized) dist = dist / numbAtt; // causes inconsistencies
dist = Math.sqrt(dist);
if (measureType == MEASURE_TYPE_SIMILARITY)
dist = 1.0 / (1.0 + dist / simMeasNormConst);
break;
case TYPE_SQUARED_EUCLIDEAN :
for (int i = 0; i < numbAtt; i++) {
double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
dist = dist + weights[i] * diff * diff;
}
;
if (measureType == MEASURE_TYPE_SIMILARITY)
dist = 1.0 / (1.0 + dist / simMeasNormConst);
break;
case TYPE_CHEBYCHEV :
for (int i = 0; i < numbAtt; i++) {
double diff = AttDist(vec1.getMetaData(), i, vec1.getValue(i), vec2.getValue(i));
diff = weights[i] * diff;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -