📄 kdtree.java

📁 矩阵的QR分解算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    KDTree.java *    Copyright (C) 2000-2007 University of Waikato *     */package weka.core.neighboursearch;import weka.core.DistanceFunction;import weka.core.EuclideanDistance;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.neighboursearch.kdtrees.KDTreeNode;import weka.core.neighboursearch.kdtrees.KDTreeNodeSplitter;import weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class implementing the KDTree search algorithm for nearest neighbour search.<br/> * The connection to dataset is only a reference. For the tree structure the indexes are stored in an array. <br/> * Building the tree:<br/> * If a node has &lt;maximal-inst-number&gt; (option -L) instances no further splitting is done. Also if the split would leave one side empty, the branch is not split any further even if the instances in the resulting node are more than &lt;maximal-inst-number&gt; instances.<br/> * **PLEASE NOTE:** The algorithm can not handle missing values, so it is advisable to run ReplaceMissingValues filter if there are any missing values in the dataset.<br/> * <br/> * For more information see:<br/> * <br/> * Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on Mathematics Software. 3(3):209-226.<br/> * <br/> * Andrew Moore (1991). A tutorial on kd-trees. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Friedman1977, *    author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel}, *    journal = {ACM Transactions on Mathematics Software}, *    month = {September}, *    number = {3}, *    pages = {209-226}, *    title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time}, *    volume = {3}, *    year = {1977} * } *  * &#64;techreport{Moore1991, *    author = {Andrew Moore}, *    booktitle = {University of Cambridge Computer Laboratory Technical Report No. 209}, *    howpublished = {Extract from PhD Thesis}, *    title = {A tutorial on kd-trees}, *    year = {1991}, *    HTTP = {Available from http://www.autonlab.org/autonweb/14665.html} * } * </pre> * <p/> <!-- technical-bibtex-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -S &lt;classname and options&gt; *  Node splitting method to use. *  (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)</pre> *  * <pre> -W &lt;value&gt; *  Set minimal width of a box *  (default: 1.0E-2).</pre> *  * <pre> -L *  Maximal number of instances in a leaf *  (default: 40).</pre> *  * <pre> -N *  Normalizing will be done *  (Select dimension for split, with normalising to universe).</pre> *  <!-- options-end -->  *  * @author Gabi Schmidberger (gabi[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @author Malcolm Ware (mfw4[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.1 $ */public class KDTree  extends NearestNeighbourSearch  implements TechnicalInformationHandler {  /** For serialization. */  private static final long serialVersionUID = 1505717283763272533L;  /**   * Array holding the distances of the nearest neighbours. It is filled up both   * by nearestNeighbour() and kNearestNeighbours().   */  protected double[] m_DistanceList;  /**   * Indexlist of the instances of this kdtree. Instances get sorted according   * to the splits. the nodes of the KDTree just hold their start and end   * indices   */  protected int[] m_InstList;  /** The root node of the tree. */  protected KDTreeNode m_Root;  /** The node splitter. */  protected KDTreeNodeSplitter m_Splitter = new SlidingMidPointOfWidestSide();  /** Tree stats. */  protected int m_NumNodes, m_NumLeaves, m_MaxDepth;  /** Tree Stats variables. */  protected TreePerformanceStats m_TreeStats = null;  // Constants  /** The index of MIN value in attributes' range array. */  public static final int MIN = EuclideanDistance.R_MIN;    /** The index of MAX value in attributes' range array. */  public static final int MAX = EuclideanDistance.R_MAX;     /** The index of WIDTH (MAX-MIN) value in attributes' range array. */  public static final int WIDTH = EuclideanDistance.R_WIDTH;  /**   * Returns an instance of a TechnicalInformation object, containing detailed   * information about the technical background of this class, e.g., paper   * reference or book this class is based on.   *    * @return		the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation result;    TechnicalInformation additional;    result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel");    result.setValue(Field.YEAR, "1977");    result.setValue(Field.TITLE, "An Algorithm for Finding Best Matches in Logarithmic Expected Time");    result.setValue(Field.JOURNAL, "ACM Transactions on Mathematics Software");    result.setValue(Field.PAGES, "209-226");    result.setValue(Field.MONTH, "September");    result.setValue(Field.VOLUME, "3");    result.setValue(Field.NUMBER, "3");    additional = result.add(Type.TECHREPORT);    additional.setValue(Field.AUTHOR, "Andrew Moore");    additional.setValue(Field.YEAR, "1991");    additional.setValue(Field.TITLE, "A tutorial on kd-trees");    additional.setValue(Field.HOWPUBLISHED, "Extract from PhD Thesis");    additional.setValue(Field.BOOKTITLE, "University of Cambridge Computer Laboratory Technical Report No. 209");    additional.setValue(Field.HTTP, "Available from http://www.autonlab.org/autonweb/14665.html");    return result;  }  /**   * Creates a new instance of KDTree.   */  public KDTree() {    super();    if (getMeasurePerformance())      m_Stats = m_TreeStats = new TreePerformanceStats();  }  /**   * Creates a new instance of KDTree.    * It also builds the tree on supplied set of Instances.   * @param insts The instances/points on which the BallTree    * should be built on.   */  public KDTree(Instances insts) {    super(insts);    if (getMeasurePerformance())      m_Stats = m_TreeStats = new TreePerformanceStats();  }  /**   * Builds the KDTree on the supplied set of instances/points. It    * is adviseable to run the replace missing attributes filter    * on the passed instances first.   * NOTE: This method should not be called from outside this    * class. Outside classes should call setInstances(Instances)   * instead.   *    * @param instances	The instances to build the tree on   * @throws Exception	if something goes wrong   */  protected void buildKDTree(Instances instances) throws Exception {    checkMissing(instances);    if (m_EuclideanDistance == null)      m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(          instances);    else      m_EuclideanDistance.setInstances(instances);    m_Instances = instances;    int numInst = m_Instances.numInstances();    // Make the global index list    m_InstList = new int[numInst];    for (int i = 0; i < numInst; i++) {      m_InstList[i] = i;    }    double[][] universe = m_EuclideanDistance.getRanges();    // initializing internal fields of KDTreeSplitter    m_Splitter.setInstances(m_Instances);    m_Splitter.setInstanceList(m_InstList);    m_Splitter.setEuclideanDistanceFunction(m_EuclideanDistance);    m_Splitter.setNodeWidthNormalization(m_NormalizeNodeWidth);    // building tree    m_NumNodes = m_NumLeaves = 1;    m_MaxDepth = 0;    m_Root = new KDTreeNode(m_NumNodes, 0, m_Instances.numInstances() - 1,        universe);    splitNodes(m_Root, universe, m_MaxDepth + 1);  }  /**    * Recursively splits nodes of a tree starting from the supplied node.   * The splitting stops for any node for which the number of instances/points   * falls below a given threshold (given by m_MaxInstInLeaf), or if the    * maximum relative width/range of the instances/points    * (i.e. max_i(max(att_i) - min(att_i)) ) falls below a given threshold    * (given by m_MinBoxRelWidth).    *    * @param node The node to start splitting from.   * @param universe The attribute ranges of the whole dataset.   * @param depth The depth of the supplied node.     * @throws Exception If there is some problem    * splitting.   */  protected void splitNodes(KDTreeNode node, double[][] universe,      int depth) throws Exception {    double[][] nodeRanges = m_EuclideanDistance.initializeRanges(m_InstList,                                                 node.m_Start, node.m_End);    if (node.numInstances() <= m_MaxInstInLeaf        || getMaxRelativeNodeWidth(nodeRanges, universe) <= m_MinBoxRelWidth)      return;    // splitting a node so it is no longer a leaf    m_NumLeaves--;    if (depth > m_MaxDepth)      m_MaxDepth = depth;    m_Splitter.splitNode(node, m_NumNodes, nodeRanges, universe);    m_NumNodes += 2;    m_NumLeaves += 2;    splitNodes(node.m_Left, universe, depth + 1);    splitNodes(node.m_Right, universe, depth + 1);  }  /**   * Returns (in the supplied heap object) the k nearest    * neighbours of the given instance starting from the give    * tree node. &gt;k neighbours are returned if there are more than    * one neighbours at the kth boundary. NOTE: This method should    * not be used from outside this class. Outside classes should    * call kNearestNeighbours(Instance, int).   *    * @param target  The instance to find the nearest neighbours for.   * @param node The KDTreeNode to start the search from.   * @param k    The number of neighbours to find.   * @param heap The MyHeap object to store/update the kNNs found   * during the search.   * @param distanceToParents The distance of the supplied target    * to the parents of the supplied tree node.    * @throws Exception  if the nearest neighbour could not be found.   */  protected void findNearestNeighbours(Instance target, KDTreeNode node, int k,      MyHeap heap, double distanceToParents) throws Exception {    if (node.isALeaf()) {      if (m_TreeStats != null) {        m_TreeStats.updatePointCount(node.numInstances());        m_TreeStats.incrLeafCount();      }      double distance;      // look at all the instances in this leaf      for (int idx = node.m_Start; idx <= node.m_End; idx++) {        if (target == m_Instances.instance(m_InstList[idx])) // for                                                              // hold-one-out                                                              // cross-validation          continue;        if (heap.size() < k) {          distance = m_EuclideanDistance.distance(target, m_Instances              .instance(m_InstList[idx]), Double.POSITIVE_INFINITY, m_Stats);          heap.put(m_InstList[idx], distance);        } else {          MyHeapElement temp = heap.peek();          distance = m_EuclideanDistance.distance(target, m_Instances              .instance(m_InstList[idx]), temp.distance, m_Stats);          if (distance < temp.distance) {            heap.putBySubstitute(m_InstList[idx], distance);          } else if (distance == temp.distance) {            heap.putKthNearest(m_InstList[idx], distance);          }        }// end else heap.size==k      }// end for    } else {      if (m_TreeStats != null) {        m_TreeStats.incrIntNodeCount();      }      KDTreeNode nearer, further;      boolean targetInLeft = m_EuclideanDistance.valueIsSmallerEqual(target,          node.m_SplitDim, node.m_SplitValue);      if (targetInLeft) {        nearer = node.m_Left;        further = node.m_Right;      } else {        nearer = node.m_Right;        further = node.m_Left;      }      findNearestNeighbours(target, nearer, k, heap, distanceToParents);      // ... now look in further half if maxDist reaches into it      if (heap.size() < k) { // if haven't found the first k        double distanceToSplitPlane = distanceToParents            + m_EuclideanDistance.sqDifference(node.m_SplitDim, target                .value(node.m_SplitDim), node.m_SplitValue);        findNearestNeighbours(target, further, k, heap, distanceToSplitPlane);        return;      } else { // else see if ball centered at query intersects with the other                // side.        double distanceToSplitPlane = distanceToParents            + m_EuclideanDistance.sqDifference(node.m_SplitDim, target                .value(node.m_SplitDim), node.m_SplitValue);        if (heap.peek().distance >= distanceToSplitPlane) {          findNearestNeighbours(target, further, k, heap, distanceToSplitPlane);        }      }// end else    }// end else_if an internal node  }  /**   * Returns the k nearest neighbours of the supplied instance.   * &gt;k neighbours are returned if there are more than one    * neighbours at the kth boundary.    *    * @param target	The instance to find the nearest neighbours for.   * @param k 		The number of neighbours to find.   * @return The k nearest neighbours (or &gt;k if more there are than   * one neighbours at the kth boundary).    * @throws Exception 	if the nearest neighbour could not be found.   */  public Instances kNearestNeighbours(Instance target, int k) throws Exception {    checkMissing(target);    if (m_Stats != null)      m_Stats.searchStart();    MyHeap heap = new MyHeap(k);    findNearestNeighbours(target, m_Root, k, heap, 0.0);    if (m_Stats != null)      m_Stats.searchFinish();    Instances neighbours = new Instances(m_Instances, (heap.size() + heap        .noOfKthNearest()));    m_DistanceList = new double[heap.size() + heap.noOfKthNearest()];    int[] indices = new int[heap.size() + heap.noOfKthNearest()];    int i = indices.length - 1;    MyHeapElement h;    while (heap.noOfKthNearest() > 0) {      h = heap.getKthNearest();      indices[i] = h.index;      m_DistanceList[i] = h.distance;      i--;    }    while (heap.size() > 0) {      h = heap.get();      indices[i] = h.index;      m_DistanceList[i] = h.distance;      i--;    }    m_DistanceFunction.postProcessDistances(m_DistanceList);    for (int idx = 0; idx < indices.length; idx++) {      neighbours.add(m_Instances.instance(indices[idx]));    }    return neighbours;  }    /**   * Returns the nearest neighbour of the supplied target    * instance.    *     * @param target	The instance to find the nearest neighbour for.   * @return The nearest neighbour from among the previously    * supplied training instances.   * @throws Exception 	if the neighbours could not be found.   */  public Instance nearestNeighbour(Instance target) throws Exception {    return (kNearestNeighbours(target, 1)).instance(0);  }    /**   * Returns the distances to the kNearest or 1 nearest neighbour currently   * found with either the kNearestNeighbours or the nearestNeighbour method.   *    * @return array containing the distances of the   *         nearestNeighbours. The length and ordering of the array    *         is the same as that of the instances returned by
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -