📄 kdtree.java
字号:
* Deletes one instance in this KDTree node or its subsequent nodes. * @param index the index of the instance to be deleted * @return true if instance was deleted */ public boolean deleteInstance(int index) throws Exception { int pos = m_InstList.deleteOneIndex(index); if (pos >= 0) { m_Root.tidyUpAfterDelete(pos); return true; } else { return false; } } /** * Returns statistics about the KDTree. * @param num give number of nodes * @param leaves give number of leaves * @return a text string that contains the statistics to the KDTree */ public String statToString (boolean nodes, boolean leaves) { int count = 1; int stats[] = new int [2]; if (this.m_Left != null) count = this.m_Left.collectStats(count, stats); if (this.m_Right != null) count = this.m_Right.collectStats(count, stats); StringBuffer text = new StringBuffer(); if (nodes) text.append("\n Number of nodes in the tree " + count + " \n"); if (leaves) text.append(" Number of leaves in the tree " + stats[0] + " \n"); return text.toString(); } /** * Returns statistics about the KDTree. * @param count number of nodes so far * @param stats array with stats info * @return the number of nodes */ public int collectStats (int count, int[] stats) { count++; if (this.m_Left != null) count = this.m_Left.collectStats(count, stats); if (this.m_Right != null) count = this.m_Right.collectStats(count, stats); else // is a leaf stats[0]++; return count; } /** * Returns the KDTree node and its underlying branches as string. * @param leaves adds the instances of the leaves * @return a string representing the node */ public String nodeToString (boolean leaves) { StringBuffer text = new StringBuffer(); text.append("NODE-Nr: " + m_NodeNumber + "\n"); int len = m_End - m_Start + 1; text.append("Num of instances: " + len + "\n"); text.append("start " + m_Start + " == end " + m_End + "\n"); if (!isALeaf()) { text.append("attribute: " + this.m_SplitDim); text.append("split at: " + this.m_SplitValue + "\n"); } else { text.append("is a leaf\n"); if (leaves) { for (int i = m_Start; i <= m_End; i++) { int instIndex = m_InstList.get(i); text.append(instIndex + ": "); text.append(m_Instances.instance(instIndex).toString() + "\n"); } } } text.append("------------------\n"); if (this.m_Left != null) text.append(this.m_Left.nodeToString(leaves)); if (this.m_Right != null) text.append(this.m_Right.nodeToString(leaves)); return text.toString(); } /** * Assigns instances to the current centers called candidates. * * @param centers all the current centers * @param candidates the current centers the method works on * @param assignments the center index for each instance * @param pc the threshold value for pruning * @param p True if pruning should be used */ private void determineAssignments(Instances centers, int[] candidates, int[] assignments, double pc, boolean p) throws Exception { // reduce number of owners for current hyper rectangle int [] owners = refineOwners(centers, candidates); // only one owner if (owners.length == 1) { // all instances of this node are owned by one center for (int i = m_Start; i <= m_End; i++) { assignments[m_InstList.get(i)] // the assignment of this instance = owners[0]; // is the current owner } } else if (!this.isALeaf()) { // more than one owner and it is not a leaf m_Left.determineAssignments(centers, owners, assignments, pc, p); m_Right.determineAssignments(centers, owners, assignments, pc, p); } else { // this is a leaf and there are more than 1 owner //XMeans. assignSubToCenters(m_NodeRanges, centers, owners, assignments); } } /** * Refines the ownerlist. * @param centers all centers * @param candidates the indexes of those centers that are candidates * @return list of owners */ private int [] refineOwners(Instances centers, int [] candidates) throws Exception { int [] owners = new int [candidates.length]; double minDistance = Double.MAX_VALUE; int ownerIndex = -1; Instance owner; int numCand = candidates.length; double [] distance = new double[numCand]; boolean [] inside = new boolean[numCand]; for (int i = 0; i < numCand; i++) { distance[i] = distanceToHrect(centers.instance(candidates[i])); inside[i] = (distance[i] == 0.0); if (distance[i] < minDistance) { minDistance = distance[i]; ownerIndex = i; } } owner = new Instance(centers.instance(candidates[ownerIndex])); // are there other owners // loop also goes over already found owner, keeps order // in owner list int index = 0; for (int i = 0; i < numCand; i++) { // 1. all centers that are points within rectangle are owners if ((inside[i]) // 2. take all points with same distance to the rect. as the owner || (distance[i] == distance[ownerIndex])) { // add competitor to owners list owners[index++] = candidates[i]; } else { Instance competitor = new Instance(centers.instance(candidates[i])); if // 3. point has larger distance to rectangle but still can compete // with owner for some points in the rectangle (!candidateIsFullOwner(owner, competitor)) { // also add competitor to owners list owners[index++] = candidates[i]; } } } int [] result = new int [index]; for (int i = 0; i < index; i++) result[i] = owners[i]; return result; } /* * Returns true if candidate is a full owner in respect to a * competitor.<p> * * The candidate has been the closer point to the current rectangle * or even has been a point within the rectangle. * The competitor is competing with the candidate for a few points * out of the rectangle although it is a point further away * from the rectangle then the candidate. * The extrem point is the corner of the rectangle that is furthest * away from the candidate towards the direction of the competitor. * * If the distance candidate to this extreme point is smaller * then the distance competitor to this extreme point, then it is * proven that none of the points in the rectangle can be owned be * the competitor and the candidate is full owner of the rectangle * in respect to this competitor. * See also D. Pelleg and A. Moore's paper 'Accelerating exact k-means * Algorithms with Geometric Reasoning'. <p> * * @param candidate instance that is candidate to be owner * @param competitor instance that competes against the candidate * @return true if candidate is full owner */ private boolean candidateIsFullOwner(Instance candidate, Instance competitor) throws Exception { // get extreme point Instance extreme = new Instance(candidate); for (int i = 0; i < m_Instances.numAttributes(); i++) { if ((competitor.value(i) - candidate.value(i)) > 0) { extreme.setValue(i, m_NodeRanges[i][R_MAX]); } else { extreme.setValue(i, m_NodeRanges[i][R_MIN]); } } boolean isFullOwner = // m_DistanceFunction.distance(extreme, candidate) < m_DistanceFunction.distance(extreme, competitor); return isFullOwner; } /** * Returns the distance between a point and an hyperrectangle. * @param x the point * @return the distance */ private double distanceToHrect(Instance x) throws Exception { double distance = 0.0; Instance closestPoint = new Instance(x); boolean inside; inside = clipToInsideHrect(closestPoint); if (!inside) distance = m_DistanceFunction.distance(closestPoint, x); return distance; } /** * Finds the closest point in the hyper rectangle to a given point. * Change the given point to this closest point by clipping of * at all the dimensions to be clipped of. * If the point is inside the rectangle it stays unchanged. * The return value is true if the point was not changed, so the * the return value is true if the point was inside the rectangle. * * @param x a point * @return true if the input point stayed unchanged. */ private boolean clipToInsideHrect(Instance x) { boolean inside = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { //TODO treat nominals differently!?? if (x.value(i) < m_NodeRanges[i][R_MIN]) { x.setValue(i, m_NodeRanges[i][R_MIN]); inside = false; } else if (x.value(i) > m_NodeRanges[i][R_MAX]) { x.setValue(i, m_NodeRanges[i][R_MAX]); inside = false; } } return inside; } /** * Assigns instances of this node to center. Center to be assign to * is decided by the distance function. * * @param ranges min's and max's of attributes * @param centers all the input centers * @param centList the list of centers to work with * @param assignments index list of last assignments */ public void assignSubToCenters(double [][] ranges, Instances centers, int [] centList, int [] assignments) throws Exception { //todo: undecided situations int numCent = centList.length; // WARNING: assignments is "input/output-parameter" // should not be null and the following should not happen if (assignments == null) { assignments = new int[m_Instances.numInstances()]; for (int i = 0; i < assignments.length; i++) { assignments[i] = -1; } } // set assignments for all instances of this node for (int i = m_Start; i <= m_End; i++) { int instIndex = m_InstList.get(i); Instance inst = m_Instances.instance(instIndex); //if (instList[i] == 664) System.out.println("664***"); int newC = m_DistanceFunction.closestPoint(inst, centers, centList); // int newC = clusterProcessedInstance(inst, centers); assignments[instIndex] = newC; } } /** * Find k nearest neighbours to target by simply searching through all instances * in the leaf. * No check on missing class. * @param target the instance to find nearest neighbour for * @param minDist the minimal distance found so far * @return the minimal distance found */ public double simpleKNearestNeighbour(Instance target, double minDist) throws Exception { double dist = 0; int currIndex; // sets and uses: // double m_MinDist // double m_MaxMinDist // int m_FurthestNear int i = 0; int index = m_Start; // if no instances, return max value as distance if (m_End < m_Start) return Double.MAX_VALUE; if (m_NearestListLength == 0) { for (;(index <= m_End) && (i < m_kNN);) { currIndex = m_InstList.get(index); Instance trainInstance = m_Instances.instance(m_InstList.get(index)); if (target != trainInstance) { // for hold-one-out cross-validation dist = m_DistanceFunction.distance(target, trainInstance); m_NearestList[i] = currIndex; m_DistanceList[i] = dist; if (dist < minDist) minDist = dist; i++; } index++; } m_NearestListLength = m_kNN; } /* System.out.print("dist: "); for (int j = 0; j < m_kNN; j++) { System.out.print(" " + m_DistanceList[j]); } System.out.println(); System.out.print("near: "); for (int j = 0; j < m_kNN; j++) { System.out.print(" " + m_NearestList[j]); } System.out.println(); */ // set the new furthest nearest m_FurthestNear = checkFurthestNear(); m_MaxMinDist = m_DistanceList[m_FurthestNear]; // check all or rest of instances if nearer for (; index <= m_End; index++) { currIndex = m_InstList.get(index); Instance trainInstance = m_Instances.instance(currIndex); if (target != trainInstance) { // for hold-one-out cross-validation dist = m_DistanceFunction.distance(target, trainInstance); // is instance one of the nearest? if (dist < m_MaxMinDist) { // set instance as one of the nearest, // replacing the last furthest nearest m_NearestList[m_FurthestNear] = currIndex; m_DistanceList[m_FurthestNear] = dist; if (m_MultipleFurthest) { // remove multiple entries of old furthest nearest m_NearestListLength = m_kNN; m_MultipleFurthest = false; } // set the new furthest nearest m_FurthestNear = checkFurthestNear(); m_MaxMinDist = m_DistanceList[m_FurthestNear]; // minimal value of distances did change too if (dist < minDist) minDist = dist; } else { if (dist == m_MaxMinDist) { // instance is at same distance as furthest nearest m_MultipleFurthest = true; m_NearestList[m_NearestListLength] = currIndex; m_DistanceList[m_NearestListLength] = dist; m_NearestListLength++; } } } } return minDist; } /** * Finds the nearest neighbour to target, this method is called recursively. * @param target the instance to find nearest neighbour for * @param maxDist the distance to the nearest neighbour so far * @return the minimal distance found */ private double kNearestNeighbour(Instance target, double maxDist) throws Exception { double newDist; KDTreeNode nearer, further; // if is a leaf then the instance is in this hyperrectangle if (this.isALeaf()) { // return distance to nearest (and index of all // all k nearest in m_NearestList) return this.simpleKNearestNeighbour(target, maxDist); } boolean targetInLeft = m_DistanceFunction.valueIsSmallerEqual( target, m_SplitDim, m_SplitValue); if (targetInLeft) { nearer = m_Left; further = m_Right; } else { nearer = m_Right; further = m_Left; } // look for nearer neighbours in nearer half maxDist = nearer.kNearestNeighbour(target, maxDist); // ... now look in further half if maxDist reaches into it Instance splitPoint = new Instance(target); splitPoint.setValue(m_SplitDim, m_SplitValue); double distanceToSplit = m_DistanceFunction.distance(target, splitPoint); boolean lookInSecondHalf = maxDist >= distanceToSplit; if (lookInSecondHalf) { // look for nearer neighbours in further half maxDist = further.kNearestNeighbour(target, maxDist); } return maxDist; } } // // End of class KDTreeNode /** * Adds one instance to KDTree loosly. It only changes the ranges. The ranges are * important for the distance function. * @param instance the new instance */ public void addLooslyInstance(Instance instance) { m_DistanceFunction.updateRanges(instance); } /** * Builds the KDTree. * @param instances instances to build the tree of */ public void buildKDTree(Instances instances) throws Exception { double [][] ranges = instances.initializeRanges(); buildKDTree(instances, ranges); } /** * Builds the KDTree. * It is adviseable to run the replace missing attributes filter on the * passed instances first. * @param instances instances to build the tree of * @ranges the ranges of this instances */ public void buildKDTree(Instances instances, double [][] ranges) throws Exception { m_Instances = instances; int numInst = m_Instances.numInstances(); // Make the global index list m_InstList = new DynamicArrayOfPosInt(numInst); for (int i = 0; i < numInst; i++) { m_InstList.set(i, i); } // make the tree starting with the roor node m_Root = new KDTreeNode(); if (ranges == null) { ranges = instances.initializeRanges(); } // set global ranges m_Universe = ranges; // set distance function m_DistanceFunction = new weka.core.EuclideanDistance(m_Instances, m_Universe); checkInstances(); // build the tree int [] num = new int[1]; num[0] = 0; m_Root.makeKDTreeNode(num, ranges, 0, // index of first instance index numInst - 1); // index of last instance index // tree is valid for instances m_Valid = true; } /** * Checks the instances. * No checks in this KDTree but it calls the check of the distance function. */ private void checkInstances () throws Exception {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -