⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kdtree.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     * Deletes one instance in this KDTree node or its subsequent nodes.     * @param index the index of the instance to be deleted     * @return true if instance was deleted     */    public boolean deleteInstance(int index) throws Exception {      int pos = m_InstList.deleteOneIndex(index);      if (pos >= 0) {	m_Root.tidyUpAfterDelete(pos);	return true;      } else {	return false;      }    }    /**     * Returns statistics about the KDTree.     * @param num give number of nodes     * @param leaves give number of leaves     * @return a text string that contains the statistics to the KDTree     */    public String statToString (boolean nodes, boolean leaves) {            int count = 1;      int stats[] = new int [2];      if (this.m_Left != null) count = this.m_Left.collectStats(count, stats);      if (this.m_Right != null) count = this.m_Right.collectStats(count, stats);            StringBuffer text = new StringBuffer();      if (nodes)	text.append("\n  Number of nodes in the tree " + count + " \n");      if (leaves)	text.append("  Number of leaves in the tree " + stats[0] + " \n");      return text.toString();    }    /**     * Returns statistics about the KDTree.     * @param count number of nodes so far     * @param stats array with stats info      * @return the number of nodes      */    public int collectStats (int count, int[] stats) {      count++;      if (this.m_Left != null) count = this.m_Left.collectStats(count, stats);      if (this.m_Right != null) 	count = this.m_Right.collectStats(count, stats);       else // is a leaf	stats[0]++;      return count;    }    /**     * Returns the KDTree node and its underlying branches as string.     * @param leaves adds the instances of the leaves     * @return a string representing the node     */    public String nodeToString (boolean leaves) {      StringBuffer text = new StringBuffer();      text.append("NODE-Nr:          " + m_NodeNumber + "\n");      int len = m_End - m_Start + 1;      text.append("Num of instances: " + len + "\n");      text.append("start " + m_Start + " == end " + m_End + "\n");      if (!isALeaf()) {	text.append("attribute: " + this.m_SplitDim);	text.append("split at: " + this.m_SplitValue + "\n");      }      else {	text.append("is a leaf\n");	if (leaves) {	  for (int i = m_Start; i <= m_End; i++) {	    int instIndex = m_InstList.get(i);            text.append(instIndex + ": ");	    text.append(m_Instances.instance(instIndex).toString() + "\n");	  } 	}      }      text.append("------------------\n");      if (this.m_Left != null) text.append(this.m_Left.nodeToString(leaves));      if (this.m_Right != null) text.append(this.m_Right.nodeToString(leaves));      return text.toString();    }    /**     * Assigns instances to the current centers called candidates.     *      * @param centers all the current centers     * @param candidates the current centers the method works on     * @param assignments the center index for each instance     * @param pc the threshold value for pruning     * @param p True if pruning should be used     */    private void determineAssignments(Instances centers,		        	       int[] candidates,				       int[] assignments,				       double pc, boolean p)       throws Exception {            // reduce number of owners for current hyper rectangle      int [] owners = refineOwners(centers, candidates);            // only one owner          if (owners.length == 1) {	// all instances of this node are owned by one center	for (int i = m_Start; i <= m_End; i++) {	  assignments[m_InstList.get(i)] // the assignment of this instance	    = owners[0]; // is the current owner	}      }      else 	if (!this.isALeaf()) {	  // more than one owner and it is not a leaf	  m_Left.determineAssignments(centers, owners, assignments, pc, p);	  m_Right.determineAssignments(centers, owners, assignments, pc, p);	}	else {	  // this is a leaf and there are more than 1 owner	  //XMeans.	  assignSubToCenters(m_NodeRanges, centers, owners, assignments);	}    }        /**     * Refines the ownerlist.     * @param centers all centers     * @param candidates the indexes of those centers that are candidates     * @return list of owners     */    private int [] refineOwners(Instances centers, int [] candidates)       throws Exception {            int [] owners = new int [candidates.length];      double minDistance = Double.MAX_VALUE;      int ownerIndex = -1;      Instance owner;      int numCand = candidates.length;      double [] distance = new double[numCand];      boolean [] inside = new boolean[numCand];      for (int i = 0; i < numCand; i++) {	distance[i] = distanceToHrect(centers.instance(candidates[i]));	inside[i] = (distance[i] == 0.0);	if (distance[i] < minDistance) {	  minDistance = distance[i];	  ownerIndex = i;	}      }      owner = new Instance(centers.instance(candidates[ownerIndex]));            // are there other owners      // loop also goes over already found owner, keeps order       // in owner list      int index = 0;      for (int i = 0; i < numCand; i++) {	// 1.  all centers that are points within rectangle are owners	if ((inside[i]) 	    	    // 2. take all points with same distance to the rect. as the owner 	    || (distance[i] == distance[ownerIndex]))	  {	    	    // add competitor to owners list	    owners[index++] = candidates[i];	  }	else {	  	  Instance competitor = new Instance(centers.instance(candidates[i]));	  if 	    	    // 3. point has larger distance to rectangle but still can compete 	    // with owner for some points in the rectangle	    (!candidateIsFullOwner(owner, competitor)) 	    	    {	      // also add competitor to owners list	      owners[index++] = candidates[i];	    }	}      }      int [] result = new int [index];      for (int i = 0; i < index; i++) result[i] = owners[i];      return result;    }        /*     * Returns true if candidate is a full owner in respect to a     * competitor.<p>     *     * The candidate has been the closer point to the current rectangle      * or even has been a point within the rectangle.     * The competitor is competing with the candidate for a few points      * out of the rectangle although it is a point further away      * from the rectangle then the candidate.     * The extrem point is the corner of the rectangle that is furthest      * away from the candidate towards the direction of the competitor.     *     * If the distance candidate to this extreme point is smaller     * then the distance competitor to this extreme point, then it is      * proven that none of the points in the rectangle can be owned be      * the competitor and the candidate is full owner of the rectangle      * in respect to this competitor.      * See also D. Pelleg and A. Moore's paper 'Accelerating exact k-means      * Algorithms with Geometric Reasoning'. <p>     *     * @param candidate instance that is candidate to be owner     * @param competitor instance that competes against the candidate     * @return true if candidate is full owner     */    private boolean candidateIsFullOwner(Instance candidate, 					 Instance competitor)       throws Exception {      // get extreme point      Instance extreme = new Instance(candidate);      for (int i = 0; i < m_Instances.numAttributes(); i++) {	if ((competitor.value(i) - candidate.value(i)) > 0) {	  extreme.setValue(i, m_NodeRanges[i][R_MAX]);	}	else {	  extreme.setValue(i, m_NodeRanges[i][R_MIN]);	}      }      boolean isFullOwner = 	//	m_DistanceFunction.distance(extreme, candidate) <	m_DistanceFunction.distance(extreme, competitor);            return isFullOwner;    }    /**     * Returns the distance between a point and an hyperrectangle.     * @param x the point     * @return the distance     */    private double distanceToHrect(Instance x) throws Exception {      double distance = 0.0;            Instance closestPoint = new Instance(x);       boolean inside;      inside = clipToInsideHrect(closestPoint);      if (!inside)	distance = m_DistanceFunction.distance(closestPoint, x);      return distance;    }             /**     * Finds the closest point in the hyper rectangle to a given point.     * Change the given point to this closest point by clipping of      * at all the dimensions to be clipped of.     * If the point is inside the rectangle it stays unchanged.     * The return value is true if the point was not changed, so the     * the return value is true if the point was inside the rectangle.     *     * @param x a point     * @return true if the input point stayed unchanged.     */    private boolean clipToInsideHrect(Instance x) {      boolean inside = true;       for (int i = 0; i < m_Instances.numAttributes(); i++) {	//TODO treat nominals differently!??		if (x.value(i) < m_NodeRanges[i][R_MIN]) {	  x.setValue(i, m_NodeRanges[i][R_MIN]);	  inside = false;	}	else if (x.value(i) > m_NodeRanges[i][R_MAX]) {	  x.setValue(i, m_NodeRanges[i][R_MAX]);	  inside = false;	}      }      return inside;    }    /**     * Assigns instances of this node to center. Center to be assign to     * is decided by the distance function.     *     * @param ranges min's and max's of attributes     * @param centers all the input centers     * @param centList the list of centers to work with     * @param assignments index list of last assignments     */    public void assignSubToCenters(double [][] ranges,				   Instances centers, 				   int [] centList, 				   int [] assignments)       throws Exception {      //todo: undecided situations      int numCent = centList.length;            // WARNING:  assignments is "input/output-parameter"      // should not be null and the following should not happen      if (assignments == null) {	assignments = new int[m_Instances.numInstances()];	for (int i = 0; i < assignments.length; i++) {	  assignments[i] = -1;	}      }      // set assignments for all instances of this node      for (int i = m_Start; i <= m_End; i++) {	int instIndex = m_InstList.get(i);	Instance inst = m_Instances.instance(instIndex);	//if (instList[i] == 664) System.out.println("664***");	int newC = m_DistanceFunction.closestPoint(inst, centers, centList);	// int newC = clusterProcessedInstance(inst, centers);	assignments[instIndex] = newC;      }    }    /**     * Find k nearest neighbours to target by simply searching through all instances     * in the leaf.     * No check on missing class.     * @param target the instance to find nearest neighbour for     * @param minDist the minimal distance found so far     * @return the minimal distance found     */    public double simpleKNearestNeighbour(Instance target, 					  double minDist)       throws Exception {      double dist = 0;      int currIndex;      // sets and uses:      //   double m_MinDist      //   double m_MaxMinDist      //   int m_FurthestNear      int i = 0;      int index = m_Start;      // if no instances, return max value as distance      if (m_End < m_Start) 	return Double.MAX_VALUE;            if (m_NearestListLength == 0) {	for (;(index <= m_End) && (i < m_kNN);) {          currIndex = m_InstList.get(index);	  Instance trainInstance = m_Instances.instance(m_InstList.get(index));	  if (target != trainInstance) { // for hold-one-out cross-validation	    dist = m_DistanceFunction.distance(target, trainInstance);	    m_NearestList[i] = currIndex;	    m_DistanceList[i] = dist;	    if (dist < minDist) minDist = dist; 	    i++;	  }	  index++;	}	m_NearestListLength = m_kNN;       }            /*	System.out.print("dist: ");      for (int j = 0; j < m_kNN; j++) {	System.out.print(" " + m_DistanceList[j]);      }      System.out.println();      System.out.print("near: ");      for (int j = 0; j < m_kNN; j++) {	System.out.print(" " + m_NearestList[j]);      }      System.out.println();      */            // set the new furthest nearest      m_FurthestNear = checkFurthestNear();      m_MaxMinDist = m_DistanceList[m_FurthestNear];      // check all or rest of instances if nearer      for (; index <= m_End; index++) {	      	currIndex = m_InstList.get(index);	Instance trainInstance = m_Instances.instance(currIndex);	if (target != trainInstance) { // for hold-one-out cross-validation	  dist = m_DistanceFunction.distance(target, trainInstance);	  	  // is instance one of the nearest?	  if (dist < m_MaxMinDist) {	    // set instance as one of the nearest, 	    // replacing the last furthest nearest	    m_NearestList[m_FurthestNear] = currIndex;	    m_DistanceList[m_FurthestNear] = dist;	    	    if (m_MultipleFurthest) {	      // remove multiple entries of old furthest nearest	      m_NearestListLength = m_kNN;	      m_MultipleFurthest = false;	    }	    	    // set the new furthest nearest	    m_FurthestNear = checkFurthestNear();	    m_MaxMinDist = m_DistanceList[m_FurthestNear];	    // minimal value of distances did change too	    if (dist < minDist)	      minDist = dist;	  }	  else {	    if (dist == m_MaxMinDist) {	      // instance is at same distance as furthest nearest	      m_MultipleFurthest = true;	      m_NearestList[m_NearestListLength] = currIndex;	      m_DistanceList[m_NearestListLength] = dist;	      m_NearestListLength++;	    }	  }	}      }      return minDist;    }        /**     * Finds the nearest neighbour to target, this method is called recursively.     * @param target the instance to find nearest neighbour for     * @param maxDist the distance to the nearest neighbour so far     * @return the minimal distance found     */    private double kNearestNeighbour(Instance target, 				     double maxDist) throws Exception {      double newDist;      KDTreeNode nearer, further;      // if is a leaf then the instance is in this hyperrectangle      if (this.isALeaf()) {        // return distance to nearest (and index of all	// all k nearest in m_NearestList) 	return this.simpleKNearestNeighbour(target, maxDist);      }      boolean targetInLeft = m_DistanceFunction.valueIsSmallerEqual(	     target, 	     m_SplitDim,             m_SplitValue);      if (targetInLeft) {	nearer = m_Left;	further = m_Right;      } else {	nearer = m_Right;	further = m_Left;      }      // look for nearer neighbours in nearer half      maxDist = nearer.kNearestNeighbour(target, maxDist);             // ... now look in further half if maxDist reaches into it      Instance splitPoint = new Instance(target);      splitPoint.setValue(m_SplitDim, m_SplitValue);      double distanceToSplit = m_DistanceFunction.distance(target, splitPoint);      boolean lookInSecondHalf = maxDist >= distanceToSplit;      if (lookInSecondHalf) {	// look for nearer neighbours in further half 	maxDist = further.kNearestNeighbour(target, maxDist);      }       return maxDist;    }  }   //   // End of class KDTreeNode  /**   * Adds one instance to KDTree loosly. It only changes the ranges. The ranges are   * important for the distance function.   * @param instance the new instance   */  public void addLooslyInstance(Instance instance) {    m_DistanceFunction.updateRanges(instance);  }  /**   * Builds the KDTree.   * @param instances instances to build the tree of   */  public void buildKDTree(Instances instances)   throws Exception {    double [][] ranges = instances.initializeRanges();    buildKDTree(instances, ranges);  }  /**   * Builds the KDTree.   * It is adviseable to run the replace missing attributes filter on the   * passed instances first.   * @param instances instances to build the tree of   * @ranges the ranges of this instances   */  public void buildKDTree(Instances instances,			  double [][] ranges) throws Exception {    m_Instances = instances;    int numInst = m_Instances.numInstances();    // Make the global index list     m_InstList = new DynamicArrayOfPosInt(numInst);     for (int i = 0; i < numInst; i++) {      m_InstList.set(i, i);    }    // make the tree starting with the roor node    m_Root = new KDTreeNode();     if (ranges == null) {      ranges = instances.initializeRanges();    }    // set global ranges    m_Universe = ranges;    // set distance function    m_DistanceFunction = new weka.core.EuclideanDistance(m_Instances, m_Universe);    checkInstances();    // build the tree     int [] num = new int[1];    num[0] = 0;    m_Root.makeKDTreeNode(num,			  ranges,                           0,            // index of first instance index                          numInst - 1); // index of last instance index    // tree is valid for instances    m_Valid = true;  }  /**   * Checks the instances.   * No checks in this KDTree but it calls the check of the distance function.   */  private void checkInstances () throws Exception {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -