📄 covertree.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * CoverTree.java * Copyright (C) 2006 Alina Beygelzimer and Sham Kakade and John Langford */package weka.core.neighboursearch;import weka.core.DistanceFunction;import weka.core.EuclideanDistance;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.converters.CSVLoader;import weka.core.neighboursearch.covertrees.Stack;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.Serializable;import java.util.Enumeration;import java.util.List;import java.util.Vector;/** <!-- globalinfo-start --> * Class implementing the CoverTree datastructure.<br/> * The class is very much a translation of the c source code made available by the authors.<br/> * <br/> * For more information and original source code see:<br/> * <br/> * Alina Beygelzimer, Sham Kakade, John Langford: Cover trees for nearest neighbor. In: ICML'06: Proceedings of the 23rd international conference on Machine learning, New York, NY, USA, 97-104, 2006. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Beygelzimer2006, * address = {New York, NY, USA}, * author = {Alina Beygelzimer and Sham Kakade and John Langford}, * booktitle = {ICML'06: Proceedings of the 23rd international conference on Machine learning}, * pages = {97-104}, * publisher = {ACM Press}, * title = {Cover trees for nearest neighbor}, * year = {2006}, * location = {Pittsburgh, Pennsylvania}, * HTTP = {http://hunch.net/\~jl/projects/cover_tree/cover_tree.html} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B <value> * Set base of the expansion constant * (default = 1.3).</pre> * <!-- options-end --> * * @author Alina Beygelzimer (original C++ code) * @author Sham Kakade (original C++ code) * @author John Langford (original C++ code) * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) (Java port) * @version $Revision: 1.3 $ */public class CoverTree extends NearestNeighbourSearch implements TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = 7617412821497807586L; /** * class representing a node of the cover tree. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.3 $ */ public class CoverTreeNode implements Serializable { /** for serialization. */ private static final long serialVersionUID = 1808760031169036512L; /** ID for the node. */ private int nodeid; /** Index of the instance represented by this node in the index array. */ private Integer idx; /** The distance of the furthest descendant of the node. */ private double max_dist; // The maximum distance to any grandchild. /** The distance to the nodes parent. */ private double parent_dist; // The distance to the parent. /** The children of the node. */ private Stack<CoverTreeNode> children; /** The number of children node has. */ private int num_children; // The number of children. /** The min i that makes base^i <= max_dist. */ private int scale; // Essentially, an upper bound on the distance to any child. /** Constructor for the class. */ public CoverTreeNode() { } /** * Constructor. * @param i The index of the Instance this node is * associated with. * @param md The distance of the furthest descendant. * @param pd The distance of the node to its parent. * @param childs Children of the node in a stack. * @param numchilds The number of children of the * node. * @param s The scale/level of the node in the tree. */ public CoverTreeNode(Integer i, double md, double pd, Stack<CoverTreeNode> childs, int numchilds, int s) { idx = i; max_dist = md; parent_dist = pd; children = childs; num_children = numchilds; scale = s; } /** Returns the instance represented by the node. * @return The instance represented by the node. */ public Instance p() { return m_Instances.instance(idx); } /** Returns whether if the node is a leaf or not. * @return true if the node is a leaf node. */ public boolean isALeaf() { return num_children==0; } } /** * Private class holding a point's distance to the current reference * point p. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.3 $ */ private class DistanceNode { /** * The last distance is to the current reference point * (potential current parent). The previous ones are * to reference points that were previously looked at * (all potential ancestors). */ Stack<Double> dist; /** The index of the instance represented by this node. */ Integer idx; /** * Returns the instance represent by this DistanceNode. * @return The instance represented by this node. */ public Instance q() { return m_Instances.instance(idx); } } /** The euclidean distance function to use. */ protected EuclideanDistance m_EuclideanDistance; { // to make sure we have only one object of EuclideanDistance if (m_DistanceFunction instanceof EuclideanDistance) m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction; else m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(); } /** The root node. */ protected CoverTreeNode m_Root; /** * Array holding the distances of the nearest neighbours. It is filled up * both by nearestNeighbour() and kNearestNeighbours(). */ protected double [] m_DistanceList; /** Number of nodes in the tree. */ protected int m_NumNodes, m_NumLeaves, m_MaxDepth; /** Tree Stats variables. */ protected TreePerformanceStats m_TreeStats = null; /** * The base of our expansion constant. In other words the 2 in 2^i used * in covering tree and separation invariants of a cover tree. P.S.: In * paper it's suggested the separation invariant is relaxed in batch * construction. */ protected double m_Base = 1.3; /** * if we have base 2 then this can be viewed as 1/ln(2), which can be * used later on to do il2*ln(d) instead of ln(d)/ln(2), to get log2(d), * in get_scale method. */ protected double il2 = 1.0 / Math.log(m_Base); /** * default constructor. */ public CoverTree() { super(); if(getMeasurePerformance()) m_Stats = m_TreeStats = new TreePerformanceStats(); } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the CoverTree datastructure.\n" + "The class is very much a translation of the c source code made " + "available by the authors.\n\n" + "For more information and original source code see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Alina Beygelzimer and Sham Kakade and John Langford"); result.setValue(Field.TITLE, "Cover trees for nearest neighbor"); result.setValue(Field.BOOKTITLE, "ICML'06: Proceedings of the 23rd international conference on Machine learning"); result.setValue(Field.PAGES, "97-104"); result.setValue(Field.YEAR, "2006"); result.setValue(Field.PUBLISHER, "ACM Press"); result.setValue(Field.ADDRESS, "New York, NY, USA"); result.setValue(Field.LOCATION, "Pittsburgh, Pennsylvania"); result.setValue(Field.HTTP, "http://hunch.net/~jl/projects/cover_tree/cover_tree.html"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option( "\tSet base of the expansion constant\n" + "\t(default = 1.3).", "B", 1, "-B <value>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B <value> * Set base of the expansion constant * (default = 1.3).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { super.setOptions(options); String optionString = Utils.getOption('B', options); if (optionString.length() != 0) setBase(Double.parseDouble(optionString)); else setBase(1.3); } /** * Gets the current settings of KDtree. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector<String> result; String[] options; int i; result = new Vector<String>(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-B"); result.add("" + getBase()); return result.toArray(new String[result.size()]); } /** * Returns the distance/value of a given scale/level. I.e. the value of * base^i (e.g. 2^i). * * @param s the level/scale * @return base^s */ protected double dist_of_scale(int s) { return Math.pow(m_Base, s); } /** * Finds the scale/level of a given value. I.e. the "i" in base^i. * * @param d the value whose scale/level is to be determined. * @return the scale/level of the given value. */ protected int get_scale(double d) { return (int) Math.ceil(il2 * Math.log(d)); } /** * Creates a new internal node for a given Instance/point p. * @param idx The index of the instance the node represents. * @return Newly created CoverTreeNode. */ protected CoverTreeNode new_node(Integer idx) { // const point &p) CoverTreeNode new_node = new CoverTreeNode();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -