📄 kdtree.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * KDTree.java * Copyright (C) 2000 University of Waikato * */package weka.core;import weka.core.*;import java.io.Serializable;import java.util.*;import java.lang.Math;/** * This is a KD-Tree structure that stores instances using a divide and conquer * method. * The connection to dataset is only a reference. For the tree structure the * indexes are stored in a dynamic array * Building the tree: * If a node has <maxleaf> (option -L) instances no further splitting is done. * Also if the split would leave one side empty, the branch is not split any * further even if the instances in the resulting node are more than <maxleaf> * instances. * * -P <br> * Pruning flag. <p> * * -W <minimal-box-relative-width> <br> * minimal width of a box * * -L <maximal-inst-number> <br> * maximal instance number in a leaf * * -D <distance function> * Distance function to be used (default = Euclidean distance) * * -N <br> * Set Normalization. Used when building the tree and selecting the * widest dimension, each dimension is 'normalized' to the universe range. * * -U <debuglevel> <br> * Set debuglevel. <p> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Malcolm Ware (mfw4@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class KDTree implements OptionHandler, Serializable{ /** flag for pruning */ boolean m_Prune = false; /** flag for normalizing */ boolean m_Normalize = false; /** for debugging: debug level */ private int m_DebugLevel = 0; /* * Ranges of the whole KDTree. * lowest and highest value and width (= high - low) for each * dimension */ private double[][] m_Universe; /** the distance function used */ private DistanceFunction m_DistanceFunction = null; /** value to split on. */ private double m_SplitValue; /** attribute to split on. */ private int m_SplitDim; /** root node */ private KDTreeNode m_Root = null; /** * Indexlist of the instances of this kdtree. * Instances get sorted according to the splits. * the nodes of the KDTree just hold their start and end indices */ private DynamicArrayOfPosInt m_InstList; /** Reference to the instances of the universe. */ private Instances m_Instances; /** * Flag that can be set to signalize that the KDTree is not valid anymore for the * dataset given, * the flag is false after the tree is newly * initialized and therefore empty * or if the dataset has changed in a way that couldn't be followed up for * the KDTree, for instance if an instance was deleted. **/ private boolean m_Valid = false; /* minimal relative width of a KDTree rectangle */ double m_MinBoxRelWidth = 1.0E-2; /** maximal number of leaves in a KDTree */ // int m_MaxLeafNumber = 100; not yet implemented /** maximal number of instances in a leaf */ int m_MaxInstInLeaf = 40; /** This will cache the pruning value for future use. */ private double m_pruneValue = Double.NaN; //todo not used /** * Index in ranges for LOW and HIGH and WIDTH */ public static int R_MIN = 0; public static int R_MAX = 1; public static int R_WIDTH = 2; /** * Default Constructor */ public KDTree() { } /** * Constructor, copies all options from an existing KDTree. * @param tree the KDTree to copy from */ public KDTree(KDTree tree) { m_Universe = tree.m_Universe; m_Instances = tree.m_Instances; m_MinBoxRelWidth = tree.m_MinBoxRelWidth; m_MaxInstInLeaf = tree.m_MaxInstInLeaf; m_Valid = tree.m_Valid; } /************************************************************************** * * A class for storing a KDTree node. * **************************************************************************/ private class KDTreeNode implements Serializable { /** node number (only for debug) */ private int m_NodeNumber; /** left subtree; contains instances with smaller or equal to split value. */ private KDTreeNode m_Left = null; /** right subtree; contains instances with larger than split value. */ private KDTreeNode m_Right = null; /** value to split on. */ private double m_SplitValue; /** attribute to split on. */ private int m_SplitDim; /** * Every subtree stores the beginning index and the end index of the range * in the main instancelist, that contains its own instances */ private int m_Start = 0; private int m_End = 0; /* * lowest and highest value and width (= high - low) for each * dimension */ private double[][] m_NodeRanges; /** * Gets the splitting dimension. * @return splitting dimension */ public int getSplitDim() { return m_SplitDim; } /** * Gets the splitting value. * @return splitting value */ public double getSplitValue() { return m_SplitValue; } /** * Checks if node is a leaf. * @return true if it is a leaf */ public boolean isALeaf () { return (m_Left == null); } /** * Tidies up after delete. This means it changes start and end ranges. * @param deleted index of the already deleted instance */ public void tidyUpAfterDelete(int deleted) { boolean changed = false; // test, if one of the childrens last instance gets deleted if (!isALeaf()) { boolean deleteLastOne = false; if ((m_Left.m_Start == deleted) && (m_Left.m_End == deleted)) { deleteLastOne = true; } if ((m_Right.m_Start == deleted) && (m_Right.m_End == deleted)) { deleteLastOne = true; } if (deleteLastOne) { // make a leaf m_Right = null; m_Left = null; } } // test if start or/and end needs to be changed if (deleted <= m_End) { m_End--; changed = true; if (deleted < m_Start) { m_Start--; } } if (changed) { // prepare local instance list to work with int numInst = m_End - m_Start + 1; int [] instList = new int[numInst]; int index = 0; for (int i = m_Start; i <= m_End; i++) { instList[index++] = m_InstList.get(i); } // set ranges and split parameter m_NodeRanges = m_Instances.initializeRanges(instList); } } /** * Makes a KDTreeNode. * Use this, if ranges are already defined. * @param num number of the current node * @param ranges the ranges * @param start start index of the instances * @param end index of the instances * @exception thrown if instance couldn't be retrieved */ private void makeKDTreeNode(int[] num, double[][] ranges, int start, int end) throws Exception { m_NodeRanges = ranges; makeKDTreeNode(num, start, end); } /** * Makes a KDTreeNode. * @param num the node number * @param start the start index of the instances in the index list * @param end the end index of the instances in the index list * @exception thrown if instance couldn't be retrieved */ private void makeKDTreeNode(int [] num, int start, int end) throws Exception { num[0]++; m_NodeNumber = num[0]; m_Start = start; m_End = end; m_Left = null; m_Right = null; m_SplitDim = -1; m_SplitValue = -1; double relWidth = 0.0; boolean makeALeaf = false; int numInst = end - start + 1; // if number of instances is under a maximum, then the node is a leaf if (numInst <= m_MaxInstInLeaf) { makeALeaf = true; } // prepare local instance list to work with int [] instList = new int[numInst]; int index = 0; for (int i = start; i <= end; i++) { instList[index++] = m_InstList.get(i); } // set ranges and split parameter if (m_NodeRanges == null) m_NodeRanges = m_Instances.initializeRanges(instList); // set outer ranges if (m_Universe == null) { m_Universe = m_NodeRanges; } m_SplitDim = widestDim(m_Normalize); if (m_SplitDim >= 0) { m_SplitValue = splitValue(m_SplitDim); // set relative width relWidth = m_NodeRanges[m_SplitDim][R_WIDTH] / m_Universe[m_SplitDim][R_WIDTH]; } /* calculate bias double bias = 0.0; for (int i = 0; i < m_Instances.numAttributes(); i++) { double tmp = m_NodeRanges[i][R_WIDTH] / m_Universe[i][R_WIDTH]; bias += tmp * tmp; } m_Bias = bias * numInst; */ // check if thin enough to make a leaf if (relWidth <= m_MinBoxRelWidth) { makeALeaf = true; } // split instance list into two // first define which one have to go left and right.. int numLeft = 0; boolean [] left = new boolean[numInst]; if (!makeALeaf) { numLeft = checkSplitInstances(left, instList, m_SplitDim, m_SplitValue); // if one of the sides would be empty, make a leaf // which means, do nothing if ((numLeft == 0) || (numLeft == numInst)) { makeALeaf = true; //OOPS("makeKDTreeNode: " + m_NodeNumber // + " one side was empty after split"); } } if (makeALeaf) { //TODO I think we don't need any of the following: // sum = // sum is a row vector that has added up all rows // summags = // is one double that contains the sum of the scalar product // of all row vectors with themselves } else { // and now really make two lists int [] leftInstList = new int[numLeft]; int [] rightInstList = new int[numInst - numLeft]; int startLeft = start; int startRight = start + numLeft; splitInstances(left, instList, startLeft, startRight); /** for (int i = 0; i < m_InstList.length(); i++) { if (i == startLeft) System.out.print(" /LE/ "); if (i == startRight) System.out.print(" /RI/ "); System.out.print(" / " + m_InstList.get(i)); } int[] debugInstList = new int[m_InstList.length()]; for (int i = 0; i < debugInstList.length; i++) { debugInstList[i] = m_InstList.get(i); } boolean [] debugleft = new boolean[m_InstList.length()]; int debugnumLeft = checkSplitInstances(debugleft, debugInstList, m_SplitDim, m_SplitValue); boolean first = true; for (int i = start; i < end; i++) { if (first && !debugleft[i]) { first = false; } System.out.print(" " + debugleft[i]); } OOPS(" "); end debug **/ // make left subKDTree int endLeft = startLeft + numLeft - 1; m_Left = new KDTreeNode(); m_Left.makeKDTreeNode(num, startLeft, endLeft); // m_Sum += m_Left.getSum(); // m_SumMags += m_Left.getSumMags(); // make right subKDTree int endRight = end; m_Right = new KDTreeNode(); m_Right.makeKDTreeNode(num, startRight, endRight); // m_Sum += m_Right.getSum(); // m_SumMags += m_Right.getSumMags(); } } /** * Returns the widest dimension. * @param normalize if true normalization is used * @return attribute index that has widest range */ private int widestDim(boolean normalize) { double widest = 0.0; int w = -1; if (normalize) { for (int i = 0; i < m_NodeRanges.length; i++) { double newWidest = m_NodeRanges[i][R_WIDTH] / m_Universe[i][R_WIDTH]; if (newWidest > widest) { widest = newWidest; w = i; } } } else { for (int i = 0; i < m_NodeRanges.length; i++) { if (m_NodeRanges[i][R_WIDTH] > widest) { widest = m_NodeRanges[i][R_WIDTH]; w = i; } } } return w; } /** * Returns the split value of a given dimension. * @param dim dimension where split happens * @return the split value */ private double splitValue(int dim) { double split = m_DistanceFunction.getMiddle(m_NodeRanges[dim]); // split = m_NodeRanges[dim][R_MIN] + m_NodeRanges[dim][R_WIDTH] * 0.5; return split; } /** * Add an instance to the node or subnode. Returns false if adding cannot * be done. * Looks for the subnode the instance actually belongs to. * Corrects the end boundary of the instance list by coming up * @param instance the instance to add * @return true if adding was done **/ public boolean addInstance(Instance instance) throws Exception { boolean success = false; if (!isALeaf()) { // go further down the tree to look for the leaf the instance should be in double instanceValue = instance.value(m_SplitDim); boolean instanceInLeft = instanceValue <= m_SplitValue; if (instanceInLeft) { success = m_Left.addInstance(instance); if (success) { // go into right branch to correct instance list boundaries m_Right.afterAddInstance(); } } else { success = m_Right.addInstance(instance); } // instance was included if (success) { // correct end index of instance list of this node m_End++; // correct ranges m_NodeRanges = Instances.updateRanges(instance, m_NodeRanges); } } else { // found the leaf to insert instance // ranges have been updated m_NodeRanges = Instances.updateRanges(instance, m_NodeRanges); int index = m_Instances.numInstances() - 1; m_InstList.squeezeIn(m_End, index); m_End++; int numInst = m_End - m_Start + 1; // leaf did get too big? if (numInst > m_MaxInstInLeaf) { //split leaf int [] num = new int[1]; num[0] = m_NodeNumber; this.makeKDTreeNode(num, m_NodeRanges, m_Start, m_End); } success = true; } return success; } /** * Corrects the boundaries of all nodes to the right of the leaf where * the instance was added to. **/ public void afterAddInstance() { m_Start++; m_End++; if (!isALeaf()) { m_Left.afterAddInstance(); m_Right.afterAddInstance(); } } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -