📄 balltree.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * BallTree.java * Copyright (C) 2007 University of Waikato */package weka.core.neighboursearch;import weka.core.EuclideanDistance;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.neighboursearch.balltrees.BallNode;import weka.core.neighboursearch.balltrees.BallTreeConstructor;import weka.core.neighboursearch.balltrees.TopDownConstructor;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class implementing the BallTree/Metric Tree algorithm for nearest neighbour search.<br/> * The connection to dataset is only a reference. For the tree structure the indexes are stored in an array.<br/> * See the implementing classes of different construction methods of the trees for details on its construction.<br/> * <br/> * For more information see also:<br/> * <br/> * Stephen M. Omohundro (1989). Five Balltree Construction Algorithms.<br/> * <br/> * Jeffrey K. Uhlmann (1991). Satisfying general proximity/similarity queries with metric trees. Information Processing Letters. 40(4):175-179. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @techreport{Omohundro1989, * author = {Stephen M. Omohundro}, * institution = {International Computer Science Institute}, * month = {December}, * number = {TR-89-063}, * title = {Five Balltree Construction Algorithms}, * year = {1989} * } * * @article{Uhlmann1991, * author = {Jeffrey K. Uhlmann}, * journal = {Information Processing Letters}, * month = {November}, * number = {4}, * pages = {175-179}, * title = {Satisfying general proximity/similarity queries with metric trees}, * volume = {40}, * year = {1991} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -C <classname and options> * The construction method to employ. Either TopDown or BottomUp * (default: weka.core.TopDownConstructor)</pre> * <!-- options-end --> * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.1 $ */public class BallTree extends NearestNeighbourSearch implements TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = 728763855952698328L; /** * The instances indices sorted inorder of appearence in the tree from left * most leaf node to the right most leaf node. */ protected int[] m_InstList; /** * The maximum number of instances in a leaf. A node is made into a leaf if * the number of instances in it become less than or equal to this value. */ protected int m_MaxInstancesInLeaf = 40; /** Tree Stats variables. */ protected TreePerformanceStats m_TreeStats = null; /** The root node of the BallTree. */ protected BallNode m_Root; /** The constructor method to use to build the tree. */ protected BallTreeConstructor m_TreeConstructor = new TopDownConstructor(); /** Array holding the distances of the nearest neighbours. It is filled up * both by nearestNeighbour() and kNearestNeighbours(). */ protected double[] m_Distances; /** * Creates a new instance of BallTree. */ public BallTree() { super(); if(getMeasurePerformance()) m_Stats = m_TreeStats = new TreePerformanceStats(); } /** * Creates a new instance of BallTree. * It also builds the tree on supplied set of Instances. * @param insts The instances/points on which the BallTree * should be built on. */ public BallTree(Instances insts) { super(insts); if(getMeasurePerformance()) m_Stats = m_TreeStats = new TreePerformanceStats(); } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the BallTree/Metric Tree algorithm for " + "nearest neighbour search.\n" + "The connection to dataset is only a reference. For the tree " + "structure the indexes are stored in an array.\n" + "See the implementing classes of different construction methods of " + "the trees for details on its construction.\n\n" + "For more information see also:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "Stephen M. Omohundro"); result.setValue(Field.YEAR, "1989"); result.setValue(Field.TITLE, "Five Balltree Construction Algorithms"); result.setValue(Field.MONTH, "December"); result.setValue(Field.NUMBER, "TR-89-063"); result.setValue(Field.INSTITUTION, "International Computer Science Institute"); additional = result.add(Type.ARTICLE); additional.setValue(Field.AUTHOR, "Jeffrey K. Uhlmann"); additional.setValue(Field.TITLE, "Satisfying general proximity/similarity queries with metric trees"); additional.setValue(Field.JOURNAL, "Information Processing Letters"); additional.setValue(Field.MONTH, "November"); additional.setValue(Field.YEAR, "1991"); additional.setValue(Field.NUMBER, "4"); additional.setValue(Field.VOLUME, "40"); additional.setValue(Field.PAGES, "175-179"); return result; } /** * Builds the BallTree on the supplied set of * instances/points (supplied with setInstances(Instances) * method and referenced by the m_Instances field). This * method should not be called by outside classes. They * should only use setInstances(Instances) method. * * @throws Exception If no instances are supplied * (m_Instances is null), or if some other error in the * supplied BallTreeConstructor occurs while building * the tree. */ protected void buildTree() throws Exception { if(m_Instances==null) throw new Exception("No instances supplied yet. Have to call " + "setInstances(instances) with a set of Instances " + "first."); m_InstList = new int[m_Instances.numInstances()]; for(int i=0; i<m_InstList.length; i++) { m_InstList[i] = i; } //end for m_DistanceFunction.setInstances(m_Instances); m_TreeConstructor.setInstances(m_Instances); m_TreeConstructor.setInstanceList(m_InstList); m_TreeConstructor.setEuclideanDistanceFunction( (EuclideanDistance)m_DistanceFunction); m_Root = m_TreeConstructor.buildTree(); } /** * Returns k nearest instances in the current neighbourhood to the supplied * instance. >k instances can be returned if there is more than one instance * at the kth boundary (i.e. if there are more than 1 instance with the * same distance as the kth nearest neighbour). * * @param target The instance to find the k nearest neighbours for. * @param k The number of nearest neighbours to find. * @throws Exception If the neighbours could not be found. * @return The k nearest neighbours of the given target instance * (>k nearest neighbours, if there are more instances that have same * distance as the kth nearest neighbour). */ public Instances kNearestNeighbours(Instance target, int k) throws Exception { MyHeap heap = new MyHeap(k); if(m_Stats!=null) m_Stats.searchStart(); nearestNeighbours(heap, m_Root, target, k); if(m_Stats!=null) m_Stats.searchFinish(); Instances neighbours = new Instances(m_Instances, heap.totalSize()); m_Distances = new double[heap.totalSize()]; int [] indices = new int[heap.totalSize()]; int i=1; MyHeapElement h; while(heap.noOfKthNearest()>0) { h = heap.getKthNearest(); indices[indices.length-i] = h.index; m_Distances[indices.length-i] = h.distance; i++; } while(heap.size()>0) { h = heap.get(); indices[indices.length-i] = h.index; m_Distances[indices.length-i] = h.distance; i++; } m_DistanceFunction.postProcessDistances(m_Distances); for(i=0; i<indices.length; i++) neighbours.add(m_Instances.instance(indices[i])); return neighbours; // <---Check this statement } /** * Does NN search according to Moore's method. * Should not be used by outside classes. They should instead * use kNearestNeighbours(Instance, int). * P.S.: The distance returned are squared. Need to post process the * distances. * @param heap MyHeap object to store/update NNs found during the search. * @param node The BallNode to do the NN search on. * @param target The target instance for which the NNs are required. * @param k The number of NNs to find. * @throws Exception If the structure of the BallTree is not correct, * or if there is some problem putting NNs in the heap. */ protected void nearestNeighbours(MyHeap heap, BallNode node, Instance target, int k) throws Exception{ double distance = Double.NEGATIVE_INFINITY; if (heap.totalSize() >= k) distance = m_DistanceFunction.distance(target, node.getPivot()); // The radius is not squared so need to take sqrt before comparison if (distance > -0.000001 && Math.sqrt(heap.peek().distance) < distance - node.getRadius()) { return; } else if (node.m_Left != null && node.m_Right != null) { // if node is not // a leaf if (m_TreeStats != null) { m_TreeStats.incrIntNodeCount(); } double leftPivotDist = Math.sqrt(m_DistanceFunction.distance(target, node.m_Left.getPivot(), Double.POSITIVE_INFINITY)); double rightPivotDist = Math.sqrt(m_DistanceFunction.distance(target, node.m_Right.getPivot(), Double.POSITIVE_INFINITY)); double leftBallDist = leftPivotDist - node.m_Left.getRadius(); double rightBallDist = rightPivotDist - node.m_Right.getRadius(); // if target is inside both balls then see which center is closer if (leftBallDist < 0 && rightBallDist < 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -