⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kdtree.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    KDTree.java *    Copyright (C) 2000 University of Waikato * */package weka.core;import  weka.core.*;import java.io.Serializable;import java.util.*;import java.lang.Math;/**  * This is a KD-Tree structure that stores instances using a divide and conquer  * method. * The connection to dataset is only a reference. For the tree structure the  * indexes are stored in a dynamic array * Building the tree: * If a node has <maxleaf> (option -L) instances no further splitting is done. * Also if the split would leave one side empty, the branch is not split any  * further even if the instances in the resulting node are more than <maxleaf>  * instances. * * -P <br> * Pruning flag. <p> * * -W <minimal-box-relative-width> <br> * minimal width of a box * * -L <maximal-inst-number> <br> * maximal instance number in a leaf *  * -D <distance function> * Distance function to be used (default = Euclidean distance) * * -N <br> * Set Normalization. Used when building the tree and selecting the * widest dimension, each dimension is 'normalized' to the universe range. * * -U <debuglevel> <br> * Set debuglevel. <p> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Malcolm Ware (mfw4@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class KDTree implements OptionHandler, Serializable{  /** flag for pruning */  boolean m_Prune = false;  /** flag for normalizing */  boolean m_Normalize = false;  /** for debugging: debug level */  private int m_DebugLevel = 0;  /*   * Ranges of the whole KDTree.   * lowest and highest value and width (= high - low) for each    * dimension   */  private double[][] m_Universe;  /** the distance function used */  private DistanceFunction m_DistanceFunction = null;  /** value to split on. */  private double m_SplitValue;  /** attribute to split on. */  private int m_SplitDim;  /** root node */  private KDTreeNode m_Root = null;  /**    * Indexlist of the instances of this kdtree.    * Instances get sorted according to the splits.   * the nodes of the KDTree just hold their start and end indices    */  private DynamicArrayOfPosInt m_InstList;  /** Reference to the instances of the universe. */  private Instances m_Instances;   /**    * Flag that can be set to signalize that the KDTree is not valid anymore for the    * dataset given,    * the flag is false after the tree is newly    * initialized and therefore empty    * or if the dataset has changed in a way that couldn't be followed up for    * the KDTree, for instance if an instance was deleted.   **/  private boolean m_Valid = false;  /* minimal relative width of a KDTree rectangle */   double m_MinBoxRelWidth = 1.0E-2;  /** maximal number of leaves in a KDTree */  // int m_MaxLeafNumber = 100; not yet implemented  /** maximal number of instances in a leaf */  int m_MaxInstInLeaf = 40;  /** This will cache the pruning value for future use. */  private double m_pruneValue = Double.NaN; //todo not used  /**   * Index in ranges for LOW and HIGH and WIDTH   */  public static int R_MIN = 0;  public static int R_MAX = 1;  public static int R_WIDTH = 2;  /**   * Default Constructor   */  public KDTree() {  }  /**   * Constructor, copies all options from an existing KDTree.   * @param tree the KDTree to copy from   */  public KDTree(KDTree tree) {    m_Universe = tree.m_Universe;    m_Instances = tree.m_Instances;    m_MinBoxRelWidth = tree.m_MinBoxRelWidth;     m_MaxInstInLeaf = tree.m_MaxInstInLeaf;    m_Valid = tree.m_Valid;   }  /**************************************************************************   *   * A class for storing a KDTree node.   *   **************************************************************************/  private class KDTreeNode implements Serializable {    /** node number (only for debug) */    private int m_NodeNumber;    /** left subtree; contains instances with smaller or equal to split value. */    private KDTreeNode m_Left = null;        /** right subtree; contains instances with larger than split value. */    private KDTreeNode m_Right = null;        /** value to split on. */    private double m_SplitValue;        /** attribute to split on. */    private int m_SplitDim;    /**     * Every subtree stores the beginning index and the end index of the range     * in the main instancelist, that contains its own instances     */    private int m_Start = 0;    private int m_End = 0;        /*     * lowest and highest value and width (= high - low) for each     * dimension     */    private double[][] m_NodeRanges;    /**     * Gets the splitting dimension.     * @return splitting dimension     */    public int getSplitDim() {      return m_SplitDim;    }        /**     * Gets the splitting value.     * @return splitting value     */    public double getSplitValue() {      return m_SplitValue;    }    /**     * Checks if node is a leaf.     * @return true if it is a leaf     */    public boolean isALeaf () {      return (m_Left == null);    }      /**     * Tidies up after delete. This means it changes start and end ranges.      * @param deleted index of the already deleted instance     */    public void tidyUpAfterDelete(int deleted) {          boolean changed = false;      // test, if one of the childrens last instance gets deleted      if (!isALeaf()) {	boolean deleteLastOne = false;	if ((m_Left.m_Start == deleted) && (m_Left.m_End == deleted)) {	  deleteLastOne = true;	}	if ((m_Right.m_Start == deleted) && (m_Right.m_End == deleted)) {	  deleteLastOne = true;	}	if (deleteLastOne) {	  // make a leaf	  m_Right = null;	  m_Left = null;	}      }      // test if start or/and end needs to be changed      if (deleted <= m_End) {	m_End--;	changed = true;	if (deleted < m_Start) {	  m_Start--;	}      }      if (changed) {	// prepare local instance list to work with	int numInst = m_End - m_Start + 1;	int [] instList = new int[numInst];	int index = 0;	for (int i = m_Start; i <= m_End; i++) {	  instList[index++] = m_InstList.get(i);	}	// set ranges and split parameter	m_NodeRanges = m_Instances.initializeRanges(instList);      }    }    /**     * Makes a KDTreeNode.      * Use this, if ranges are already defined.     * @param num number of the current node      * @param ranges the ranges      * @param start start index of the instances     * @param end index of the instances     * @exception thrown if instance couldn't be retrieved     */    private void makeKDTreeNode(int[] num, double[][] ranges, int start,			       int end) throws Exception {      m_NodeRanges = ranges;      makeKDTreeNode(num, start, end);    }    /**     * Makes a KDTreeNode.      * @param num the node number      * @param start the start index of the instances in the index list     * @param end the end index of the instances in the index list     * @exception thrown if instance couldn't be retrieved     */    private void makeKDTreeNode(int [] num, 		     	        int start,			        int end) throws Exception {      num[0]++;      m_NodeNumber =  num[0];      m_Start = start;      m_End = end;      m_Left = null;      m_Right = null;      m_SplitDim = -1;      m_SplitValue = -1;      double relWidth = 0.0;      boolean makeALeaf = false;      int numInst = end - start + 1;      // if number of instances is under a maximum, then the node is a leaf      if (numInst <= m_MaxInstInLeaf) {	makeALeaf = true;      }      // prepare local instance list to work with      int [] instList = new int[numInst];      int index = 0;      for (int i = start; i <= end; i++) {	instList[index++] = m_InstList.get(i);      }      // set ranges and split parameter      if (m_NodeRanges == null)	m_NodeRanges = m_Instances.initializeRanges(instList);      // set outer ranges      if (m_Universe == null) {	m_Universe = m_NodeRanges;      }            m_SplitDim = widestDim(m_Normalize);      if (m_SplitDim >= 0) {	m_SplitValue = splitValue(m_SplitDim);	// set relative width	relWidth = m_NodeRanges[m_SplitDim][R_WIDTH]                    / m_Universe[m_SplitDim][R_WIDTH];      }      /* calculate bias       double bias = 0.0;      for (int i = 0; i < m_Instances.numAttributes(); i++) {	double tmp =  m_NodeRanges[i][R_WIDTH] / m_Universe[i][R_WIDTH];	bias += tmp * tmp;      }      m_Bias = bias * numInst;      */      // check if thin enough to make a leaf      if (relWidth <= m_MinBoxRelWidth) {	makeALeaf = true;      }            // split instance list into two        // first define which one have to go left and right..      int numLeft = 0;      boolean [] left = new boolean[numInst];      if (!makeALeaf) {	numLeft = checkSplitInstances(left, instList,				      m_SplitDim, m_SplitValue);      	// if one of the sides would be empty, make a leaf	// which means, do nothing	if ((numLeft == 0) || (numLeft == numInst)) {	  makeALeaf = true; 	  //OOPS("makeKDTreeNode: " + m_NodeNumber           //     + " one side was empty after split");	}      }      if (makeALeaf) {	//TODO I think we don't need any of the following:	// sum = 	// sum is a row vector that has added up all rows 	// summags =	// is one double that contains the sum of the scalar product	// of all row vectors with themselves      } else {	// and now really make two lists	int [] leftInstList = new int[numLeft];	int [] rightInstList = new int[numInst - numLeft];	int startLeft = start;	int startRight = start + numLeft;	splitInstances(left, instList, startLeft, startRight);	/**	for (int i = 0; i < m_InstList.length(); i++) {          if (i == startLeft) System.out.print(" /LE/ ");           if (i == startRight) System.out.print(" /RI/ "); 	  System.out.print(" / " + m_InstList.get(i));        }        int[] debugInstList = new int[m_InstList.length()];	for (int i = 0; i < debugInstList.length; i++) {	  debugInstList[i] = m_InstList.get(i);	}	boolean [] debugleft = new boolean[m_InstList.length()];	int debugnumLeft = checkSplitInstances(debugleft, debugInstList,				      m_SplitDim, m_SplitValue);	boolean first = true;	for (int i = start; i < end; i++) {          if (first && !debugleft[i]) {	    first = false;	  }	  System.out.print(" " + debugleft[i]);	}	OOPS(" ");	end debug **/       	// make left subKDTree  	int endLeft = startLeft + numLeft - 1;	m_Left = new KDTreeNode();    	m_Left.makeKDTreeNode(num, startLeft, endLeft);	// m_Sum += m_Left.getSum(); 	// m_SumMags += m_Left.getSumMags();	// make right subKDTree	int endRight = end;	m_Right = new KDTreeNode(); 	m_Right.makeKDTreeNode(num, startRight, endRight);	// m_Sum += m_Right.getSum(); 	// m_SumMags += m_Right.getSumMags();            }    }    /**     * Returns the widest dimension.     * @param normalize if true normalization is used     * @return attribute index that has widest range     */    private int widestDim(boolean normalize) {            double widest = 0.0;      int w = -1;      if (normalize) {	for (int i = 0; i < m_NodeRanges.length; i++) {	  double newWidest = m_NodeRanges[i][R_WIDTH] / m_Universe[i][R_WIDTH];	  if (newWidest > widest) {	    widest = newWidest;	    w = i;	  }	}      }      else {	for (int i = 0; i < m_NodeRanges.length; i++) {	  if (m_NodeRanges[i][R_WIDTH] > widest) {	    widest = m_NodeRanges[i][R_WIDTH];	    w = i;	  }	}      }      return w;    }        /**     * Returns the split value of a given dimension.     * @param dim dimension where split happens     * @return the split value     */    private double splitValue(int dim) {            double split = m_DistanceFunction.getMiddle(m_NodeRanges[dim]);      // split = m_NodeRanges[dim][R_MIN] + m_NodeRanges[dim][R_WIDTH] * 0.5;      return split;    }    /**     * Add an instance to the node or subnode. Returns false if adding cannot      * be done.     * Looks for the subnode the instance actually belongs to.     * Corrects the end boundary of the instance list by coming up     * @param instance the instance to add     * @return true if adding was done     **/    public boolean addInstance(Instance instance) throws Exception {            boolean success = false;      if (!isALeaf()) {	// go further down the tree to look for the leaf the instance should be in	double instanceValue = instance.value(m_SplitDim);	boolean instanceInLeft = instanceValue <= m_SplitValue;	if (instanceInLeft) {	  success = m_Left.addInstance(instance);	  if (success) {	    // go into right branch to correct instance list boundaries	    m_Right.afterAddInstance();	  }	}	else {	  success = m_Right.addInstance(instance);	}	// instance was included	if (success) {	  // correct end index of instance list of this node	  m_End++;	  // correct ranges	  m_NodeRanges = Instances.updateRanges(instance, m_NodeRanges);	}              }      else { // found the leaf to insert instance        // ranges have been updated  	m_NodeRanges = Instances.updateRanges(instance, m_NodeRanges);	int index = m_Instances.numInstances() - 1;	m_InstList.squeezeIn(m_End, index);	m_End++; 	int numInst = m_End - m_Start + 1;	// leaf did get too big?	if (numInst > m_MaxInstInLeaf) {	  //split leaf	  int [] num = new int[1];	  num[0] = m_NodeNumber;	  this.makeKDTreeNode(num, m_NodeRanges, m_Start,			      m_End);        }	success = true;      }      return success;    }        /**     * Corrects the boundaries of all nodes to the right of the leaf where      * the instance was added to.     **/    public void afterAddInstance() {      m_Start++;      m_End++;            if (!isALeaf()) {	m_Left.afterAddInstance();	m_Right.afterAddInstance();      }    }    /**

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -