⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 euclideandistance.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    EuclideanDistance.java *    Copyright (C) 1999-2005 University of Waikato * */package weka.core;import java.util.Enumeration;import java.util.Vector;import java.io.Serializable;import java.io.*;/** * Implementing Euclidean distance (or similarity) function. * * One object defines not one distance but the data model in which  * the distances between objects of that data model can be computed. * * Attention: For efficiency reasons the use of consistency checks (like are  * the data models of the two instances exactly the same), is low.  * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.7 $ */public class EuclideanDistance implements DistanceFunction, Cloneable, 					  Serializable {  /** the data */  protected Instances m_Data;  /** True if normalization is turned off (default false).*/  protected boolean m_DontNormalize = false;    /** The number of attributes the contribute to a prediction */  protected double m_NumAttributesUsed;  /**   * Constructs an Euclidean Distance object.   * @param data the instances the distance function should work on   */  public EuclideanDistance() {  }  /**   * Constructs an Euclidean Distance object.   * @param data the instances the distance function should work on   */  public EuclideanDistance(Instances data) {    //super(data);    m_Data = data;    try { initializeRanges(); }    catch(Exception ex) {}    setNumAttributesUsed();  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(1);    newVector.add(new Option("\tTurns off the normalization of attribute "+                             "values in distance calculation.\n",                             "D", 0,"-D"));    return newVector.elements();  }    /**   * Parses a given list of options. Valid options are:<p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    setDontNormalize(Utils.getFlag('D', options));  }  /**   * Gets the current settings of IBk.   *   * @return an array of strings suitable for passing to setOptions()   */  public String [] getOptions() {    String [] options = new String[1];        if(getDontNormalize() == true)       options[0] = "-D";    else      options[0] = "";        return options;  }    /** Sets the instances */  public void setInstances(Instances insts) {    m_Data = insts;    try { initializeRanges(); }    catch(Exception ex) {}    setNumAttributesUsed();  }    /** returns the instances currently set */  public Instances getInstances() {    return m_Data;  }    /**  Tip text for the property  */  public String dontNormalizeTipText() {    return "Whether if the normalization of attributes should be turned off " +           "for distance calculation (Default: false i.e. attribute values " +           "are normalized). ";  }    /** Sets whether if the attribute values are to be normalized in distance   *  calculation.   */  public void setDontNormalize(boolean dontNormalize) {    m_DontNormalize = dontNormalize;  }    /** Gets whether if the attribute values are to be normazlied in distance   *  calculation. (default false i.e. attribute values are normalized.)   */  public boolean getDontNormalize() {    return m_DontNormalize;  }    public void update(Instance ins) throws Exception {    updateRanges(ins);  }    /**  * Calculates the distance (or similarity) between two instances.   *  * @param first the first instance  * @param second the second instance  *  * @return the distance between the two given instances.  */ public double distance(Instance first, Instance second) throws Exception {   return Math.sqrt(distance(first, second, Double.MAX_VALUE)); }   /**   * Calculates the distance (or similarity) between two instances. Need to    * pass this returned distance later on to postprocess method to set it on    * correct scale. <br>   * P.S.: Please don't mix the use of this function with    * distance(Instance first, Instance second), as that already does post    * processing. Please consider passing Double.MAX_VALUE as the cutOffValue to   * this function and then later on do the post processing on all the    * distances.   *   * @param first the first instance   * @param second the second instance   * @param If the distance being calculated becomes larger than cutOffValue    *        then the rest of the calculation is skipped and Double.MAX_VALUE is    *        returned. Otherwise the correct disntance is returned.    * @return the distance between the two given instances or Double.MAX_VALUE.   */  public double distance(Instance first, Instance second, double cutOffValue) { //debug method pls remove after use    return distance(first, second, cutOffValue, false);  }  public double distance(Instance first, Instance second, double cutOffValue, boolean print) {//    //if (!inRanges(first,m_Ranges))//    //	throw new Exception("First instance is not in ranges.\n"+"First: "+first); //OOPS("Not in ranges");//    //OOPS(" dist first  "+ first);//    //if (!inRanges(second,m_Ranges))//    //	throw new Exception("Second instance is not in ranges.\n"+"Second: "+second); //OOPS("Not in ranges");//    //OOPS(" dist second "+ second);    double distance = 0;    int firstI, secondI;        if(print==true) {      OOPS("Instance1: "+first);      OOPS("Instance2: "+second);      OOPS("cutOffValue: "+cutOffValue);    }        for (int p1 = 0, p2 = 0; 	 p1 < first.numValues() || p2 < second.numValues();) {      if (p1 >= first.numValues()) {	firstI = m_Data.numAttributes();      } else {	firstI = first.index(p1);       }      if (p2 >= second.numValues()) {	secondI = m_Data.numAttributes();      } else {	secondI = second.index(p2);      }      if (firstI == m_Data.classIndex()) {	p1++; continue;      }       if (secondI == m_Data.classIndex()) {	p2++; continue;      }       double diff;      if(print==true)         System.out.println("valueSparse(p1): "+first.valueSparse(p1)+" valueSparse(p2): "+second.valueSparse(p2));            if (firstI == secondI) {	diff = difference(firstI, 			  first.valueSparse(p1),			  second.valueSparse(p2));	p1++; p2++;      } else if (firstI > secondI) {	diff = difference(secondI, 			  0, second.valueSparse(p2));	p2++;      } else {	diff = difference(firstI, 			  first.valueSparse(p1), 0);	p1++;      }      if(print==true)         System.out.println("diff: "+diff);            distance += diff * diff;      if(distance > cutOffValue) //Utils.gr(distance, cutOffValue))        return Double.MAX_VALUE;      if(print==true)        System.out.println("distance: "+distance);    }    if(print==true) {      OOPS("Instance 1: "+first);      OOPS("Instance 2: "+second);      OOPS("distance: "+distance);      OOPS("AttribsUsed: "+m_NumAttributesUsed);      OOPS("distance/AttribsUsed: "+Math.sqrt(distance / m_NumAttributesUsed));    }    //distance = Math.sqrt(distance);    return distance;  }    /**   * Does post processing of the distances (if necessary) returned by   * distance(distance(Instance first, Instance second, double cutOffValue). It   * is necessary to do so to get the correct distances if    * distance(distance(Instance first, Instance second, double cutOffValue) is    * used. This is because that function actually returns the squared distance   * to avoid inaccuracies arising from floating point comparison.   */  public void postProcessDistances(double distances[]) {    for(int i=0; i<distances.length; i++) {      distances[i] = Math.sqrt(distances[i]);    }  }   /**   * Computes the difference between two given attribute   * values.   */  private double difference(int index, double val1, double val2) {        switch (m_Data.attribute(index).type()) {      case Attribute.NOMINAL:                // If attribute is nominal        if(Instance.isMissingValue(val1) ||           Instance.isMissingValue(val2) ||           ((int)val1 != (int)val2)) {          return 1;        } else {          return 0;        }      case Attribute.NUMERIC:        // If attribute is numeric        if (Instance.isMissingValue(val1) ||        Instance.isMissingValue(val2)) {          if(Instance.isMissingValue(val1) &&             Instance.isMissingValue(val2)) {            if(m_DontNormalize==false)  //We are doing normalization              return 1;            else              return (m_Ranges[index][R_MAX] - m_Ranges[index][R_MIN]);          } else {            double diff;            if (Instance.isMissingValue(val2)) {              diff = (m_DontNormalize==false) ? norm(val1, index) : val1;            } else {              diff = (m_DontNormalize==false) ? norm(val2, index) : val2;            }            if (m_DontNormalize==false && diff < 0.5) {              diff = 1.0 - diff;            }            else if (m_DontNormalize==true) {              if((m_Ranges[index][R_MAX]-diff) > (diff-m_Ranges[index][R_MIN]))                return m_Ranges[index][R_MAX]-diff;              else                return diff-m_Ranges[index][R_MIN];            }            return diff;          }        } else {          return (m_DontNormalize==false) ?                                   (norm(val1, index) - norm(val2, index)) :                                  (val1 - val2);        }      default:        return 0;    }  }  /**   * Normalizes a given value of a numeric attribute.   *   * @param x the value to be normalized   * @param i the attribute's index   */  private double norm(double x,int i) {    if (Double.isNaN(m_Ranges[i][R_MIN]) || m_Ranges[i][R_MAX]==m_Ranges[i][R_MIN]) { //Utils.eq(m_Ranges[i][R_MAX], m_Ranges[i][R_MIN])) {      return 0;    } else {      return (x - m_Ranges[i][R_MIN]) / (m_Ranges[i][R_WIDTH]);    }  }    /**   * Returns value in the middle of the two parameter values.   * @param range the ranges to this dimension   * @return the middle value   */  public double getMiddle(double[] ranges) {    double middle = ranges[R_MIN] + ranges[R_WIDTH] * 0.5;    return middle;  }    /**   * Returns the index of the closest point to the current instance.   * Index is index in Instances object that is the second parameter.   *   * @param instance the instance to assign a cluster to   * @param centers all centers   * @param centList the centers to cluster the instance to   * @return a cluster index   */  public int closestPoint(Instance instance, Instances allPoints,                           int [] pointList) throws Exception {    double minDist = Integer.MAX_VALUE;    int bestPoint = 0;    for (int i = 0; i < pointList.length; i++) {      double dist = distance(instance, allPoints.instance(pointList[i]), Double.MAX_VALUE);      if (dist < minDist) {        minDist = dist;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -