⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kdtree.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
  /** distance to current nearest neighbour */  private double [] m_DistanceList;  /**    * Returns the distances to the kNearest or 1 nearest neighbour currently    *  found with either the kNearestNeighbours or the nearestNeighbour method.   *   * @return distances[] array containing the distances of the    *         nearestNeighbours. The length and ordering of the array is the    *         same as that of the instances returned by nearestNeighbour    *         functions.   * @throws Exception Throws an exception if called before calling kNearestNeighbours   *         or nearestNeighbours.   */  public double[] getDistances() throws Exception {    if(m_Instances==null || m_DistanceList==null)      throw new Exception("The tree has not been supplied with a set of " +                          "instances or getDistances() has been called " +                          "before calling kNearestNeighbours().");    return m_DistanceList;  }    /** debug pls remove after use. */  private boolean print = false;   /**    * Returns the k nearest neighbours to the supplied instance.    *    * @param target The instance to find the nearest neighbours for.   * @param k The number of neighbours to find.   * @return the neighbors   * @throws Exception Throws an exception if the nearest neighbour could not be    *              found.   */  public Instances kNearestNeighbours(Instance target, int k) throws Exception {    checkMissing(target);    if(m_Instances==null)       throw new Exception("No instances supplied yet. Have to call " +                          "setInstances(instances) with a set of Instances " +                          "first.");    m_kNN = k;    double maxDist;    m_NearestList  = new int[m_Instances.numInstances()];    m_DistanceList = new double[m_Instances.numInstances()];    m_NearestListLength = 0;    for(int i=0; i<m_DistanceList.length; i++) {      m_DistanceList[i] = Double.MAX_VALUE;    }    maxDist = m_Root.kNearestNeighbour(target);    combSort11(m_DistanceList, m_NearestList);    m_EuclideanDistance.postProcessDistances(m_DistanceList);        Instances nearest = new Instances(m_Instances, 0);    double [] newDistanceList = new double[m_NearestListLength];    for(int i=0; i<m_NearestListLength; i++) {      nearest.add(m_Instances.instance(m_NearestList[i]));      newDistanceList[i] = m_DistanceList[i];    }        m_DistanceList = newDistanceList;    return nearest;  }    /**    * Returns the nearest neighbour to the supplied instance.   *   * @param target The instance to find the nearest neighbour for.   * @return the nearest neighbor   * @throws Exception Throws an exception if the neighbours could not be found.   */  public Instance nearestNeighbour(Instance target) throws Exception {    return (kNearestNeighbours(target, 1)).instance(0);  }    /**   * Find k nearest neighbours to target. This is the main method.   *   * @param target the instance to find nearest neighbour for   * @param kNN the number of neighbors to find   * @param nearestList    * @param distanceList    * @return    * @throws Exception if something goes wrong   */ //redundant no longer needed  public int findKNearestNeighbour(Instance target, int kNN, int [] nearestList,                                   double [] distanceList) throws Exception {    m_kNN = kNN;    m_NearestList = nearestList;    m_DistanceList = distanceList;    m_NearestListLength = 0;    for(int i=0; i<distanceList.length; i++) {      distanceList[i] = Double.MAX_VALUE;    }    int[] num = new int[1];     num[0] = 0;    double minDist = m_Root.kNearestNeighbour(target);    return m_NearestListLength;  }      /**   * Get the distance of the furthest of the nearest neighbour    * returns the index of this instance    * in the index list.   *    * @return the index of the instance   */  private int checkFurthestNear() {    double max = 0.0;    int furthestNear = 0;    for (int i = 0; i < m_kNN; i++) {      if (m_DistanceList[i] > max) {	max = m_DistanceList[i];	furthestNear = i;      }     }    return furthestNear;  }  /**   * the GET and SET - functions ===============================================   **/  /**    * Tip text for this property   *    * @return the tip text for this property   */  public String minBoxRelWidthTipText() {    return "The minimum relative width of the box. A node is only made a leaf "+           "if the width of the split dimension of the instances in a node " +           "normalized over the width of the split dimension of all the " +           "instances is less than or equal to this minimum relative width.";  }    /**   * Sets the minimum relative box width.   *    * @param i the minimum relative box width   */  public void setMinBoxRelWidth(double i) {    m_MinBoxRelWidth = i;  }  /**   * Gets the minimum relative box width.   * @return the minimum relative box width   */  public double getMinBoxRelWidth() {    return m_MinBoxRelWidth;  }  /**    * Tip text for this property   *    * @return the tip text for this property   */  public String maxInstInLeafTipText() {    return "The max number of instances in a leaf.";  }    /**   * Sets the maximum number of instances in a leaf.   *    * @param i the maximum number of instances in a leaf   */  public void setMaxInstInLeaf(int i) {    m_MaxInstInLeaf = i;  }  /**   * Get the maximum number of instances in a leaf.   *    * @return the maximum number of instances in a leaf   */  public int getMaxInstInLeaf() {    return  m_MaxInstInLeaf;  }  /**    * Tip text for this property   *    * @return the tip text for this property   */  public String normalizeNodeWidthTipText() {    return "Whether if the widths of the KDTree node should be normalized " +           "by the width of the universe or not. "+           "Where, width of the node is the range of the split attribute "+           "based on the instances in that node, and width of the " +           "universe is the range of the split attribute based on all the " +           "instances (default: false).";  }      /**   * Sets the flag for normalizing the widths of a KDTree Node by the width   * of the dimension in the universe.    *    * @param n true to use normalizing.   */  public void setNormalizeNodeWidth(boolean n) {      m_NormalizeNodeWidth = n;  }  /**   * Gets the normalize flag.   *    * @return True if normalizing   */  public boolean getNormalizeNodeWidth() {    return m_NormalizeNodeWidth;  }  /**    * returns the distance function currently in use   *   * @return the distance function   */  public DistanceFunction getDistanceFunction() {    return (DistanceFunction) m_EuclideanDistance;  }    /**    * sets the distance function to use for nearest neighbour search   *    * @param df		the distance function to use   * @throws Exception	if not EuclideanDistance   */  public void setDistanceFunction(DistanceFunction df) throws Exception {    if(!(df instanceof EuclideanDistance))      throw new Exception("KDTree currently only works with " +                          "EuclideanDistanceFunction.");    m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;  }      /**   * Returns a string describing this nearest neighbour search algorithm.   *    * @return a description of the algorithm for displaying in the    * explorer/experimenter gui   */  public String globalInfo() {    return "Class implementing the KDTree search algorithm for nearest "+           "neighbour search.\n" +           "The connection to dataset is only a reference. For the tree " +           "structure the indexes are stored in an array. \n" +           "Building the tree:\n" +           "If a node has <maximal-inst-number> (option -L) instances no " +           "further splitting is done. Also if the split would leave one " +           "side empty, the branch is not split any further even if the " +           "instances in the resulting node are more than " +           "<maximal-inst-number> instances.\n" +           "**PLEASE NOTE:** The algorithm can not handle missing values, so it " +           "is advisable to run ReplaceMissingValues filter if there are any " +           "missing values in the dataset.";  }    /**   * Returns an enumeration describing the available options.   *    * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector();    newVector.addElement(new Option("\tSet minimal width of a box\n"+                                    "\t(default = 1.0E-2).",				    "W", 0,"-W <value>"));    newVector.addElement(new Option("\tMaximal number of instances in a leaf\n"+                                    "\t(default = 40).",				    "L", 0,"-L"));    newVector.addElement(new Option("\tNormalizing will be done\n"+                                    "\t(Select dimension for split, with "+                                    "normalising to universe).", "N", 0,"-N"));    return newVector.elements();  }  /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -W &lt;value&gt;   *  Set minimal width of a box   *  (default = 1.0E-2).</pre>   *    * <pre> -L   *  Maximal number of instances in a leaf   *  (default = 40).</pre>   *    * <pre> -N   *  Normalizing will be done   *  (Select dimension for split, with normalising to universe).</pre>   *    <!-- options-end -->   *    * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options)    throws Exception {    super.setOptions(options);    String optionString = Utils.getOption('W', options);    if (optionString.length() != 0) {      setMinBoxRelWidth(Double.parseDouble(optionString));    }    optionString = Utils.getOption('L', options);    if (optionString.length() != 0) {      setMaxInstInLeaf(Integer.parseInt(optionString));    }    if (Utils.getFlag('N', options)) {      setNormalizeNodeWidth(true);    } else {      setNormalizeNodeWidth(false);    }  }  /**   * Gets the current settings of KDtree.   *    * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    String[] superOptions = super.getOptions();    String[] options = new String[7+superOptions.length];    int current = 0;    System.arraycopy(superOptions, current, options, current, superOptions.length);    current = superOptions.length;        options[current++] = "-W";    options[current++] = "" + getMinBoxRelWidth();    options[current++] = "-L";    options[current++] = "" + getMaxInstInLeaf();    if (getNormalizeNodeWidth()) {      options[current++] = "-N";    }    while (current < options.length) {      options[current++] = "";    }    return  options;  }  /**   * Main method for testing this class   *    * @param args	the commandline parameters   */  public static void main(String [] args) {    try {      if (args.length < 1 ) {	System.err.println("Usage: " + KDTree.class.getName() + " <dataset>");	System.exit(1);      }      Instances insts = new Instances(new java.io.FileReader(args[0]));      KDTree tree = new KDTree();      DistanceFunction df = new EuclideanDistance();      df.setInstances(insts);      tree.setInstances(insts);      tree.setDistanceFunction(df);      System.out.println(tree.toString());    }    catch (Exception ex) {      ex.printStackTrace();      System.err.println(ex.getMessage());    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -