⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kdtree.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	return Double.MAX_VALUE;            if (m_NearestListLength<m_kNN) {	for (;(index <= m_End) && (i < m_kNN);) {           currIndex = m_InstList[index]; 	  Instance trainInstance = m_Instances.instance(m_InstList[index]);	  if (target != trainInstance) { // for hold-one-out cross-validation            //if(print==true)            //  OOPS("K: "+i);            dist = m_EuclideanDistance.distance(target, trainInstance, Double.MAX_VALUE, print);	    m_NearestList[i] = currIndex;	    m_DistanceList[i] = dist;	    i++;	  }	  index++;	}        m_NearestListLength = i;      }            // set the new furthest nearest      m_FurthestNear = checkFurthestNear();  //FURTHEST IN m_kNN NEAREST NEIGHBOURS      m_MaxMinDist = m_DistanceList[m_FurthestNear];            int firstFurthestIndex=-1;      double firstFurthestDistance = -1;      int oldNearestListLength = -1;      // check all or rest of instances if nearer      for (; index <= m_End; index++) {	      	currIndex = m_InstList[index];	Instance trainInstance = m_Instances.instance(currIndex);	if (target != trainInstance) { // for hold-one-out cross-validation                    dist = m_EuclideanDistance.distance(target, trainInstance, m_MaxMinDist, print);	            // is instance one of the nearest?          if (dist < m_MaxMinDist) {                        // set instance as one of the nearest,            // replacing the last furthest nearest            firstFurthestIndex = m_NearestList[m_FurthestNear];            firstFurthestDistance = m_DistanceList[m_FurthestNear];            m_NearestList[m_FurthestNear] = currIndex;            m_DistanceList[m_FurthestNear] = dist;                        // set the new furthest nearest            m_FurthestNear = checkFurthestNear();            m_MaxMinDist = m_DistanceList[m_FurthestNear];            if (m_MultipleFurthest) {              // remove multiple entries of old furthest nearest              oldNearestListLength = m_NearestListLength;              m_NearestListLength = m_kNN;              m_MultipleFurthest = false;            }            //the instance just replaced is at same distance as furthest nearest            //therefore there are multiple furthest nearest            if (firstFurthestDistance == m_MaxMinDist) {              m_MultipleFurthest = true;              if(oldNearestListLength!=-1)                m_NearestListLength = oldNearestListLength;              m_NearestList[m_NearestListLength] = firstFurthestIndex;              m_DistanceList[m_NearestListLength] = firstFurthestDistance;              m_NearestListLength++;            }                        //get rid of the old list length as it is no longer needed and can create problems.            oldNearestListLength = m_NearestListLength;          }          else {            if (dist == m_MaxMinDist) {              // instance is at same distance as furthest nearest              m_MultipleFurthest = true;              m_NearestList[m_NearestListLength] = currIndex;              m_DistanceList[m_NearestListLength] = dist;              m_NearestListLength++;            }          }	}      }            return m_MaxMinDist;    }        /**     * Finds the nearest neighbour to target, this method is called recursively.     * @param target the instance to find nearest neighbour for     * @return the minimal distance found     * @throws Exception if something goes wrong     */    private double kNearestNeighbour(Instance target) throws Exception {      double maxDist;      KDTreeNode nearer, further;      // if is a leaf then the instance is in this hyperrectangle      if (this.isALeaf()) {        // return distance to kthnearest (and index of all	// all k nearest in m_NearestList) 	return this.simpleKNearestNeighbour(target);      }      boolean targetInLeft = m_EuclideanDistance.valueIsSmallerEqual(	     target, 	     m_SplitDim,             m_SplitValue);      if (targetInLeft) {	nearer = m_Left;	further = m_Right;      } else {	nearer = m_Right;	further = m_Left;      }      // look for nearer neighbours in nearer half      maxDist = nearer.kNearestNeighbour(target);             // ... now look in further half if maxDist reaches into it      Instance splitPoint = new Instance(target);      splitPoint.setValue(m_SplitDim, m_SplitValue);      double distanceToSplit = m_EuclideanDistance.distance(target, splitPoint, Double.MAX_VALUE);      boolean lookInSecondHalf =  maxDist >= distanceToSplit;            if (lookInSecondHalf) {        //System.out.println("Searching into the 2nd half of the tree.");	// look for nearer neighbours in further half 	maxDist = further.kNearestNeighbour(target);      }       return maxDist;    }  }   /**   * sets the instances and builds the KDTree   *    * @param instances 	the instances to build the tree from   * @throws Exception	if something goes wrong   */  public void setInstances(Instances instances) throws Exception {   buildKDTree(instances);  }    /**   * Builds the KDTree.   * It is adviseable to run the replace missing attributes filter on the   * passed instances first.   *    * @param instances instances to build the tree of   * @throws Exception if something goes wrong   */  private void buildKDTree(Instances instances) throws Exception {    checkMissing(instances);    //double [][] ranges = instances.initializeRanges();    if(m_EuclideanDistance == null )      m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(instances);    else      m_EuclideanDistance.setInstances(instances);        m_Instances = instances;    int numInst = m_Instances.numInstances();    // Make the global index list     m_InstList = new int[numInst];     for (int i = 0; i < numInst; i++) {      m_InstList[i] = i;    }    // make the tree starting with the roor node    m_Root = new KDTreeNode();     // set global ranges    m_Universe = m_EuclideanDistance.getRanges();    // build the tree     int [] num = new int[1];    num[0] = 0;    m_Root.makeKDTreeNode(num,			  m_Universe, //ranges,                          0,            // index of first instance index                          numInst - 1); // index of last instance index  }  /**   * Adds one instance to the KDTree. This updates the KDTree structure to take   * into account the newly added training instance.   *    * @param instance 	the instance to be added. Usually the newly added instance   * 			in the training set.    * @throws Exception 	if something goes wrong or instances are null   */  public void update(Instance instance) throws Exception {  //better to change to addInstance    if(m_Instances==null)      throw new Exception("No instances supplied yet. Have to call " +                          "setInstances(instances) with a set of Instances " +                          "first.");        boolean success = m_Root.addInstance(instance);    if (!success) {      // make a new tree      buildKDTree(m_Instances);    }  }  /**   * Adds one instance to KDTree loosly. It only changes the ranges in    * EuclideanDistance, and does not affect the structure of the KDTree.    *    * @param instance the new instance.  Usually this is the test instance    * supplied to update the range of attributes in the distance function.   */  public void addInstanceInfo(Instance instance) {    m_EuclideanDistance.updateRanges(instance);  }  /**   * string representing the tree     *    * @return string representing the tree   */  public String toString() {    StringBuffer text = new StringBuffer();    KDTreeNode tree = m_Root;    if(m_Root==null) {      text.append("KDTree not built yet.");      return text.toString();    }    int[] num = new int[1];    num[0] = 0;    text.append("\nKDTree build:");    text.append(tree.statToString(true, true));    // tree in string format:    text.append(tree.nodeToString(true));    return text.toString();  }    /**   * Assigns instances to centers using KDTree.    *    * @param centers the current centers   * @param assignments the centerindex for each instance   * @param pc the threshold value for pruning.   * @throws Exception if something goes wrong   */  public void centerInstances(Instances centers, int [] assignments,				double pc) throws Exception {        int [] centList = new int[centers.numInstances()];    for (int i = 0; i < centers.numInstances(); i++)      centList[i] = i;    m_Root.determineAssignments(centers, centList, 			 assignments, pc);  }  /**   * Returns array of boolean set true or false if instance is part   * of next left kdtree.   *    * @param left list of boolean values, true if instance belongs to left   * @param startIdx   * @param endIdx   * @param splitDim index of splitting attribute   * @param splitValue value at which the node is split   * @return number of instances that belong to the left   */  private int checkSplitInstances(boolean [] left,      int startIdx, int endIdx,      int splitDim, double splitValue) {    // length of left should be same as length of instList    int numLeft = 0;    for (int i = startIdx, j = 0; i <= endIdx; i++, j++) {      // value <= splitValue      if (m_EuclideanDistance.valueIsSmallerEqual(            m_Instances.instance(m_InstList[i]),            splitDim,            splitValue)) {        left[j] = true;        numLeft++;      } else {        left[j] = false;      }    }    return numLeft;  }  /**   * Sorts instances newly into left and right part.   *    * @param left list of flags, set true by this method if instance   * should go to the left follow node   * @param startIdx   * @param endIdx   * @param startLeft     */  private void splitInstances(boolean [] left,  int startIdx, int endIdx,                              int startLeft) {    int tmp;    //shuffling indices in the left node to the left of the array and those in     //right node to the right side of the array (see makeKDTreeNode() for     //referred startLeft, numLeft and startRight variables).    //After for loop starting from startLeft, numLeft indices will be on left    //and the rest will be on right starting from startRight     for (int i = startIdx, j = 0; i <= endIdx; i++, j++) {      if (left[j]) {         tmp = m_InstList[startLeft];        m_InstList[startLeft++] = m_InstList[i]; //instList[i];        m_InstList[i] = tmp;      }    }  }  /**    * Checks if there is any instance with missing values. Throws an exception    * if there is, as KDTree does not handle missing values.    *    * @param instances the instances to check   * @throws Exception if missing values are encountered   */  private void checkMissing(Instances instances) throws Exception {    for(int i=0; i<instances.numInstances(); i++) {      Instance ins = instances.instance(i);      for(int j=0; j<ins.numValues(); j++) {        if(ins.index(j) != ins.classIndex())          if(ins.isMissingSparse(j)) {            throw new Exception("ERROR: KDTree can not deal with missing " +            "values. Please run ReplaceMissingValues filter " +            "on the dataset before passing it on to the KDTree.");          }      }    }  }  /**   * Checks if there is any missing value in the instance.    *    * @param ins		the instances to check   * @throws Exception 	if missing values are encountered   */  private void checkMissing(Instance ins) throws Exception {    for(int j=0; j<ins.numValues(); j++) {      if(ins.index(j) != ins.classIndex())        if(ins.isMissingSparse(j)) {          throw new Exception("ERROR: KDTree can not deal with missing " +          "values. Please run ReplaceMissingValues filter " +          "on the dataset before passing it on to the KDTree.");        }    }  }   /** --------------------------------------------------------------------------------    ** variables for nearest neighbour search    *  --------------------------------------------------------------------------------*/  /** index/indices of current target */  private int [] m_NearestList;  /** length of nearest list (can be larger than k) */  private int m_NearestListLength = 0;  /** true if more than of k nearest neighbours */  private boolean m_MultipleFurthest = false;  /** number of nearest neighbours k */  private int m_kNN = 0;   /** distance to current furthest of the neighbours */  private double m_MaxMinDist = Double.MAX_VALUE;  /** index of the furthest of the neighbours in m_NearestList */  private int m_FurthestNear = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -