📄 euclideandistance.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * EuclideanDistance.java * Copyright (C) 1999-2005 University of Waikato * */package weka.core;import java.util.Enumeration;import java.util.Vector;import java.io.Serializable;import java.io.*;/** * Implementing Euclidean distance (or similarity) function. * * One object defines not one distance but the data model in which * the distances between objects of that data model can be computed. * * Attention: For efficiency reasons the use of consistency checks (like are * the data models of the two instances exactly the same), is low. * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.7 $ */public class EuclideanDistance implements DistanceFunction, Cloneable, Serializable { /** the data */ protected Instances m_Data; /** True if normalization is turned off (default false).*/ protected boolean m_DontNormalize = false; /** The number of attributes the contribute to a prediction */ protected double m_NumAttributesUsed; /** * Constructs an Euclidean Distance object. * @param data the instances the distance function should work on */ public EuclideanDistance() { } /** * Constructs an Euclidean Distance object. * @param data the instances the distance function should work on */ public EuclideanDistance(Instances data) { //super(data); m_Data = data; try { initializeRanges(); } catch(Exception ex) {} setNumAttributesUsed(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(1); newVector.add(new Option("\tTurns off the normalization of attribute "+ "values in distance calculation.\n", "D", 0,"-D")); return newVector.elements(); } /** * Parses a given list of options. Valid options are:<p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDontNormalize(Utils.getFlag('D', options)); } /** * Gets the current settings of IBk. * * @return an array of strings suitable for passing to setOptions() */ public String [] getOptions() { String [] options = new String[1]; if(getDontNormalize() == true) options[0] = "-D"; else options[0] = ""; return options; } /** Sets the instances */ public void setInstances(Instances insts) { m_Data = insts; try { initializeRanges(); } catch(Exception ex) {} setNumAttributesUsed(); } /** returns the instances currently set */ public Instances getInstances() { return m_Data; } /** Tip text for the property */ public String dontNormalizeTipText() { return "Whether if the normalization of attributes should be turned off " + "for distance calculation (Default: false i.e. attribute values " + "are normalized). "; } /** Sets whether if the attribute values are to be normalized in distance * calculation. */ public void setDontNormalize(boolean dontNormalize) { m_DontNormalize = dontNormalize; } /** Gets whether if the attribute values are to be normazlied in distance * calculation. (default false i.e. attribute values are normalized.) */ public boolean getDontNormalize() { return m_DontNormalize; } public void update(Instance ins) throws Exception { updateRanges(ins); } /** * Calculates the distance (or similarity) between two instances. * * @param first the first instance * @param second the second instance * * @return the distance between the two given instances. */ public double distance(Instance first, Instance second) throws Exception { return Math.sqrt(distance(first, second, Double.MAX_VALUE)); } /** * Calculates the distance (or similarity) between two instances. Need to * pass this returned distance later on to postprocess method to set it on * correct scale. <br> * P.S.: Please don't mix the use of this function with * distance(Instance first, Instance second), as that already does post * processing. Please consider passing Double.MAX_VALUE as the cutOffValue to * this function and then later on do the post processing on all the * distances. * * @param first the first instance * @param second the second instance * @param If the distance being calculated becomes larger than cutOffValue * then the rest of the calculation is skipped and Double.MAX_VALUE is * returned. Otherwise the correct disntance is returned. * @return the distance between the two given instances or Double.MAX_VALUE. */ public double distance(Instance first, Instance second, double cutOffValue) { //debug method pls remove after use return distance(first, second, cutOffValue, false); } public double distance(Instance first, Instance second, double cutOffValue, boolean print) {// //if (!inRanges(first,m_Ranges))// // throw new Exception("First instance is not in ranges.\n"+"First: "+first); //OOPS("Not in ranges");// //OOPS(" dist first "+ first);// //if (!inRanges(second,m_Ranges))// // throw new Exception("Second instance is not in ranges.\n"+"Second: "+second); //OOPS("Not in ranges");// //OOPS(" dist second "+ second); double distance = 0; int firstI, secondI; if(print==true) { OOPS("Instance1: "+first); OOPS("Instance2: "+second); OOPS("cutOffValue: "+cutOffValue); } for (int p1 = 0, p2 = 0; p1 < first.numValues() || p2 < second.numValues();) { if (p1 >= first.numValues()) { firstI = m_Data.numAttributes(); } else { firstI = first.index(p1); } if (p2 >= second.numValues()) { secondI = m_Data.numAttributes(); } else { secondI = second.index(p2); } if (firstI == m_Data.classIndex()) { p1++; continue; } if (secondI == m_Data.classIndex()) { p2++; continue; } double diff; if(print==true) System.out.println("valueSparse(p1): "+first.valueSparse(p1)+" valueSparse(p2): "+second.valueSparse(p2)); if (firstI == secondI) { diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2)); p1++; p2++; } else if (firstI > secondI) { diff = difference(secondI, 0, second.valueSparse(p2)); p2++; } else { diff = difference(firstI, first.valueSparse(p1), 0); p1++; } if(print==true) System.out.println("diff: "+diff); distance += diff * diff; if(distance > cutOffValue) //Utils.gr(distance, cutOffValue)) return Double.MAX_VALUE; if(print==true) System.out.println("distance: "+distance); } if(print==true) { OOPS("Instance 1: "+first); OOPS("Instance 2: "+second); OOPS("distance: "+distance); OOPS("AttribsUsed: "+m_NumAttributesUsed); OOPS("distance/AttribsUsed: "+Math.sqrt(distance / m_NumAttributesUsed)); } //distance = Math.sqrt(distance); return distance; } /** * Does post processing of the distances (if necessary) returned by * distance(distance(Instance first, Instance second, double cutOffValue). It * is necessary to do so to get the correct distances if * distance(distance(Instance first, Instance second, double cutOffValue) is * used. This is because that function actually returns the squared distance * to avoid inaccuracies arising from floating point comparison. */ public void postProcessDistances(double distances[]) { for(int i=0; i<distances.length; i++) { distances[i] = Math.sqrt(distances[i]); } } /** * Computes the difference between two given attribute * values. */ private double difference(int index, double val1, double val2) { switch (m_Data.attribute(index).type()) { case Attribute.NOMINAL: // If attribute is nominal if(Instance.isMissingValue(val1) || Instance.isMissingValue(val2) || ((int)val1 != (int)val2)) { return 1; } else { return 0; } case Attribute.NUMERIC: // If attribute is numeric if (Instance.isMissingValue(val1) || Instance.isMissingValue(val2)) { if(Instance.isMissingValue(val1) && Instance.isMissingValue(val2)) { if(m_DontNormalize==false) //We are doing normalization return 1; else return (m_Ranges[index][R_MAX] - m_Ranges[index][R_MIN]); } else { double diff; if (Instance.isMissingValue(val2)) { diff = (m_DontNormalize==false) ? norm(val1, index) : val1; } else { diff = (m_DontNormalize==false) ? norm(val2, index) : val2; } if (m_DontNormalize==false && diff < 0.5) { diff = 1.0 - diff; } else if (m_DontNormalize==true) { if((m_Ranges[index][R_MAX]-diff) > (diff-m_Ranges[index][R_MIN])) return m_Ranges[index][R_MAX]-diff; else return diff-m_Ranges[index][R_MIN]; } return diff; } } else { return (m_DontNormalize==false) ? (norm(val1, index) - norm(val2, index)) : (val1 - val2); } default: return 0; } } /** * Normalizes a given value of a numeric attribute. * * @param x the value to be normalized * @param i the attribute's index */ private double norm(double x,int i) { if (Double.isNaN(m_Ranges[i][R_MIN]) || m_Ranges[i][R_MAX]==m_Ranges[i][R_MIN]) { //Utils.eq(m_Ranges[i][R_MAX], m_Ranges[i][R_MIN])) { return 0; } else { return (x - m_Ranges[i][R_MIN]) / (m_Ranges[i][R_WIDTH]); } } /** * Returns value in the middle of the two parameter values. * @param range the ranges to this dimension * @return the middle value */ public double getMiddle(double[] ranges) { double middle = ranges[R_MIN] + ranges[R_WIDTH] * 0.5; return middle; } /** * Returns the index of the closest point to the current instance. * Index is index in Instances object that is the second parameter. * * @param instance the instance to assign a cluster to * @param centers all centers * @param centList the centers to cluster the instance to * @return a cluster index */ public int closestPoint(Instance instance, Instances allPoints, int [] pointList) throws Exception { double minDist = Integer.MAX_VALUE; int bestPoint = 0; for (int i = 0; i < pointList.length; i++) { double dist = distance(instance, allPoints.instance(pointList[i]), Double.MAX_VALUE); if (dist < minDist) { minDist = dist;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -