📄 kdtree.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    KDTree.java *    Copyright (C) 2000 University of Waikato * */package weka.core;import java.io.Serializable;import java.util.Enumeration;import java.util.Vector;/**  <!-- globalinfo-start --> * Class implementing the KDTree search algorithm for nearest neighbour search.<br/> * The connection to dataset is only a reference. For the tree structure the indexes are stored in an array. <br/> * Building the tree:<br/> * If a node has &lt;maximal-inst-number&gt; (option -L) instances no further splitting is done. Also if the split would leave one side empty, the branch is not split any further even if the instances in the resulting node are more than &lt;maximal-inst-number&gt; instances.<br/> * **PLEASE NOTE:** The algorithm can not handle missing values, so it is advisable to run ReplaceMissingValues filter if there are any missing values in the dataset. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -W &lt;value&gt; *  Set minimal width of a box *  (default = 1.0E-2).</pre> *  * <pre> -L *  Maximal number of instances in a leaf *  (default = 40).</pre> *  * <pre> -N *  Normalizing will be done *  (Select dimension for split, with normalising to universe).</pre> *  <!-- options-end --> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Malcolm Ware (mfw4@cs.waikato.ac.nz) * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.8 $ */public class KDTree   extends NearestNeighbourSearch   implements OptionHandler, Serializable {    /** for serialization */  static final long serialVersionUID = 6945099921195713112L;  /** flag for normalizing */  boolean m_NormalizeNodeWidth = false;  /**   * Ranges of the whole KDTree.   * lowest and highest value and width (= high - low) for each    * dimension   */  private double[][] m_Universe;  /** root node */  private KDTreeNode m_Root = null;  /**    * Indexlist of the instances of this kdtree.    * Instances get sorted according to the splits.   * the nodes of the KDTree just hold their start and end indices    */  private int[] m_InstList;    /** The euclidean distance function to use */  private EuclideanDistance m_EuclideanDistance; { // to make sure we have only one object of EuclideanDistance  if(m_DistanceFunction instanceof EuclideanDistance)    m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction;  else     m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance();  }   /** minimal relative width of a KDTree rectangle */   double m_MinBoxRelWidth = 1.0E-2;  /** maximal number of instances in a leaf */  int m_MaxInstInLeaf = 40;  /**   * Index in range array of attributes for MIN and MAX and WIDTH (range) of    * an attribute.   */  public static int R_MIN = 0;  public static int R_MAX = 1;  public static int R_WIDTH = 2;  /**   * Default Constructor   */  public KDTree() {  }  /**   * Constructor, copies all options from an existing KDTree.   * @param tree the KDTree to copy from   */  public KDTree(KDTree tree) {    m_Universe = tree.m_Universe;    m_Instances = tree.m_Instances;    m_EuclideanDistance = tree.m_EuclideanDistance; //added    m_MinBoxRelWidth = tree.m_MinBoxRelWidth;     m_MaxInstInLeaf = tree.m_MaxInstInLeaf;  }  /**************************************************************************   *   * A class for storing a KDTree node.   *   **************************************************************************/  private class KDTreeNode    implements Serializable {        /** for serialization */    static final long serialVersionUID = 6569698002660546608L;    /** node number (only for debug) */    private int m_NodeNumber;    /** left subtree; contains instances with smaller or equal to split value. */    private KDTreeNode m_Left = null;        /** right subtree; contains instances with larger than split value. */    private KDTreeNode m_Right = null;        /** value to split on. */    private double m_SplitValue;        /** attribute to split on. */    private int m_SplitDim;    /**     * Every subtree stores the beginning index and the end index of the range     * in the main instancelist, that contains its own instances     */    private int m_Start = 0;    private int m_End = 0;        /**     * lowest and highest value and width (= high - low) for each     * dimension     */    private double[][] m_NodeRanges;        /**     * Gets the splitting dimension.     *      * @return splitting dimension     */    public int getSplitDim() {      return m_SplitDim;    }        /**     * Gets the splitting value.     *      * @return splitting value     */    public double getSplitValue() {      return m_SplitValue;    }    /**     * Checks if node is a leaf.     *      * @return true if it is a leaf     */    public boolean isALeaf () {      return (m_Left == null);    }      /**     * Makes a KDTreeNode.      * Use this, if ranges are already defined.     *      * @param num number of the current node      * @param ranges the ranges      * @param start start index of the instances     * @param end index of the instances     * @throws Exception if instance couldn't be retrieved     */    private void makeKDTreeNode(int[] num, double[][] ranges, int start,			       int end) throws Exception {      m_NodeRanges = ranges;      makeKDTreeNode(num, start, end);    }    /**     * Makes a KDTreeNode.      *      * @param num the node number      * @param start the start index of the instances in the index list     * @param end the end index of the instances in the index list     * @throws Exception if instance couldn't be retrieved     */    private void makeKDTreeNode(int [] num, int start, int end)       throws Exception {      num[0]++;      m_NodeNumber =  num[0];      m_Start = start;      m_End = end;      m_Left = null;      m_Right = null;      m_SplitDim = -1;      m_SplitValue = -1;      double relWidth = 0.0;      boolean makeALeaf = false;      int numInst = end - start + 1;      // if number of instances is under a maximum, then the node is a leaf      if (numInst <= m_MaxInstInLeaf) {	makeALeaf = true;      }      // set ranges and split parameter      if (m_NodeRanges == null)        m_NodeRanges = m_EuclideanDistance.initializeRanges(m_InstList, start, end);      // set outer ranges      if (m_Universe == null) {	m_Universe = m_NodeRanges;      }            m_SplitDim = widestDim(m_NormalizeNodeWidth, m_Instances.classIndex());      if (m_SplitDim >= 0) {	m_SplitValue = splitValue(m_SplitDim);	// set relative width	relWidth = m_NodeRanges[m_SplitDim][R_WIDTH]                    / m_Universe[m_SplitDim][R_WIDTH];      }      // check if thin enough to make a leaf      if (relWidth <= m_MinBoxRelWidth) {	makeALeaf = true;      }            // split instance list into two        // first define which one have to go left and right..      int numLeft = 0;      boolean [] left = new boolean[numInst];      if (!makeALeaf) {        numLeft = checkSplitInstances(left, start, end, m_SplitDim, m_SplitValue);      	// if one of the sides would be empty, make a leaf	// which means, do nothing	if ((numLeft == 0) || (numLeft == numInst)) {	  makeALeaf = true; 	}      }      if (makeALeaf) {	//TODO I think we don't need any of the following:	// sum = 	// sum is a row vector that has added up all rows 	// summags =	// is one double that contains the sum of the scalar product	// of all row vectors with themselves      } else {	// and now really make two lists	int startLeft = start;	int startRight = start + numLeft;        splitInstances(left, start, end, startLeft);	// make left subKDTree  	int endLeft = startLeft + numLeft - 1;	m_Left = new KDTreeNode();    	m_Left.makeKDTreeNode(num, startLeft, endLeft);	// make right subKDTree	int endRight = end;	m_Right = new KDTreeNode(); 	m_Right.makeKDTreeNode(num, startRight, endRight);      }    }    /**     * Returns the widest dimension.     *      * @param normalize if true normalization is used     * @param classIdx the index of the class attribute     * @return attribute index that has widest range     */    private int widestDim(boolean normalize, int classIdx) {            double widest = 0.0;      int w = -1;      if (normalize) {	for (int i = 0; i < m_NodeRanges.length; i++) {	  double newWidest = m_NodeRanges[i][R_WIDTH] / m_Universe[i][R_WIDTH];	  if (newWidest > widest) {            if(i == classIdx) continue;	    widest = newWidest;	    w = i;	  }	}      }      else {	for (int i = 0; i < m_NodeRanges.length; i++) {	  if (m_NodeRanges[i][R_WIDTH] > widest) {            if(i == classIdx) continue;	    widest = m_NodeRanges[i][R_WIDTH];	    w = i;	  }	}      }      return w;    }        /**     * Returns the split value of a given dimension.     *      * @param dim dimension where split happens     * @return the split value     */    private double splitValue(int dim) {            double split = m_EuclideanDistance.getMiddle(m_NodeRanges[dim]);      return split;    }    /**     * Add an instance to the node or subnode. Returns false if adding cannot      * be done.     * Looks for the subnode the instance actually belongs to.     * Corrects the end boundary of the instance list by coming up     *      * @param instance the instance to add     * @return true if adding was done     * @throws Exception if something goes wrong     */    public boolean addInstance(Instance instance) throws Exception {            boolean success = false;      if (!isALeaf()) {	// go further down the tree to look for the leaf the instance should be in	double instanceValue = instance.value(m_SplitDim);	boolean instanceInLeft = instanceValue <= m_SplitValue;	if (instanceInLeft) {	  success = m_Left.addInstance(instance);	  if (success) {	    // go into right branch to correct instance list boundaries	    m_Right.afterAddInstance();	  }	}	else {	  success = m_Right.addInstance(instance);	}	// instance was included	if (success) {	  // correct end index of instance list of this node
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -