📄 kdtree.java
字号:
m_End++; // correct ranges m_NodeRanges = m_EuclideanDistance.updateRanges(instance, m_NodeRanges); } } else { // found the leaf to insert instance // ranges have been updated m_NodeRanges = m_EuclideanDistance.updateRanges(instance, m_NodeRanges); int index = m_Instances.numInstances() - 1; int InstList[] = new int[m_Instances.numInstances()]; System.arraycopy(m_InstList, 0, InstList, 0, m_End+1); //m_InstList.squeezeIn(m_End, index); if(m_End<m_InstList.length-1) System.arraycopy(m_InstList, m_End+1, InstList, 0, m_InstList.length); m_InstList[m_End] = index; m_End++; int numInst = m_End - m_Start + 1; // leaf did get too big? if (numInst > m_MaxInstInLeaf) { //split leaf int [] num = new int[1]; num[0] = m_NodeNumber; this.makeKDTreeNode(num, m_NodeRanges, m_Start, m_End); } success = true; } return success; } /** * Corrects the boundaries of all nodes to the right of the leaf where * the instance was added to. */ public void afterAddInstance() { m_Start++; m_End++; if (!isALeaf()) { m_Left.afterAddInstance(); m_Right.afterAddInstance(); } } /** * Returns statistics about the KDTree. * * @param nodes give number of nodes * @param leaves give number of leaves * @return a text string that contains the statistics to the KDTree */ public String statToString (boolean nodes, boolean leaves) { int count = 1; int stats[] = new int [2]; if (this.m_Left != null) count = this.m_Left.collectStats(count, stats); if (this.m_Right != null) count = this.m_Right.collectStats(count, stats); StringBuffer text = new StringBuffer(); if (nodes) text.append("\n Number of nodes in the tree " + count + " \n"); if (leaves) text.append(" Number of leaves in the tree " + stats[0] + " \n"); return text.toString(); } /** * Returns statistics about the KDTree. * * @param count number of nodes so far * @param stats array with stats info * @return the number of nodes */ public int collectStats (int count, int[] stats) { count++; if (this.m_Left != null) count = this.m_Left.collectStats(count, stats); if (this.m_Right != null) count = this.m_Right.collectStats(count, stats); else // is a leaf stats[0]++; return count; } /** * Returns the KDTree node and its underlying branches as string. * * @param leaves adds the instances of the leaves * @return a string representing the node */ public String nodeToString (boolean leaves) { StringBuffer text = new StringBuffer(); text.append("NODE-Nr: " + m_NodeNumber + "\n"); int len = m_End - m_Start + 1; text.append("Num of instances: " + len + "\n"); text.append("start " + m_Start + " == end " + m_End + "\n"); if (!isALeaf()) { text.append("attribute: " + this.m_SplitDim); text.append("split at: " + this.m_SplitValue + "\n"); } else { text.append("is a leaf\n"); if (leaves) { for (int i = m_Start; i <= m_End; i++) { int instIndex = m_InstList[i]; text.append(instIndex + ": "); text.append(m_Instances.instance(instIndex).toString() + "\n"); } } } text.append("------------------\n"); if (this.m_Left != null) text.append(this.m_Left.nodeToString(leaves)); if (this.m_Right != null) text.append(this.m_Right.nodeToString(leaves)); return text.toString(); } /** * Assigns instances to the current centers called candidates. * * @param centers all the current centers * @param candidates the current centers the method works on * @param assignments the center index for each instance * @param pc the threshold value for pruning * @throws Exception if something goes wrong */ private void determineAssignments(Instances centers, int[] candidates, int[] assignments, double pc) throws Exception { // reduce number of owners for current hyper rectangle int [] owners = refineOwners(centers, candidates); // only one owner if (owners.length == 1) { // all instances of this node are owned by one center for (int i = m_Start; i <= m_End; i++) { assignments[m_InstList[i]] // the assignment of this instance = owners[0]; // is the current owner } } else if (!this.isALeaf()) { // more than one owner and it is not a leaf m_Left.determineAssignments(centers, owners, assignments, pc); m_Right.determineAssignments(centers, owners, assignments, pc); } else { // this is a leaf and there are more than 1 owner //XMeans. assignSubToCenters(m_NodeRanges, centers, owners, assignments); } } /** * Refines the ownerlist. * * @param centers all centers * @param candidates the indexes of those centers that are candidates * @return list of owners * @throws Exception if something goes wrong */ private int [] refineOwners(Instances centers, int [] candidates) throws Exception { int [] owners = new int [candidates.length]; double minDistance = Double.MAX_VALUE; int ownerIndex = -1; Instance owner; int numCand = candidates.length; double [] distance = new double[numCand]; boolean [] inside = new boolean[numCand]; for (int i = 0; i < numCand; i++) { distance[i] = distanceToHrect(centers.instance(candidates[i])); inside[i] = (distance[i] == 0.0); if (distance[i] < minDistance) { minDistance = distance[i]; ownerIndex = i; } } owner = new Instance(centers.instance(candidates[ownerIndex])); // are there other owners // loop also goes over already found owner, keeps order // in owner list int index = 0; for (int i = 0; i < numCand; i++) { // 1. all centers that are points within rectangle are owners if ((inside[i]) // 2. take all points with same distance to the rect. as the owner || (distance[i] == distance[ownerIndex])) { // add competitor to owners list owners[index++] = candidates[i]; } else { Instance competitor = new Instance(centers.instance(candidates[i])); if // 3. point has larger distance to rectangle but still can compete // with owner for some points in the rectangle (!candidateIsFullOwner(owner, competitor)) { // also add competitor to owners list owners[index++] = candidates[i]; } } } int [] result = new int [index]; for (int i = 0; i < index; i++) result[i] = owners[i]; return result; } /** * Returns true if candidate is a full owner in respect to a * competitor.<p> * * The candidate has been the closer point to the current rectangle * or even has been a point within the rectangle. * The competitor is competing with the candidate for a few points * out of the rectangle although it is a point further away * from the rectangle then the candidate. * The extrem point is the corner of the rectangle that is furthest * away from the candidate towards the direction of the competitor. * * If the distance candidate to this extreme point is smaller * then the distance competitor to this extreme point, then it is * proven that none of the points in the rectangle can be owned be * the competitor and the candidate is full owner of the rectangle * in respect to this competitor. * See also D. Pelleg and A. Moore's paper 'Accelerating exact k-means * Algorithms with Geometric Reasoning'. <p> * * @param candidate instance that is candidate to be owner * @param competitor instance that competes against the candidate * @return true if candidate is full owner * @throws Exception if something goes wrong */ private boolean candidateIsFullOwner(Instance candidate, Instance competitor) throws Exception { // get extreme point Instance extreme = new Instance(candidate); for (int i = 0; i < m_Instances.numAttributes(); i++) { if ((competitor.value(i) - candidate.value(i)) > 0) { extreme.setValue(i, m_NodeRanges[i][R_MAX]); } else { extreme.setValue(i, m_NodeRanges[i][R_MIN]); } } boolean isFullOwner = m_EuclideanDistance.distance(extreme, candidate) < m_EuclideanDistance.distance(extreme, competitor); return isFullOwner; } /** * Returns the distance between a point and an hyperrectangle. * * @param x the point * @return the distance * @throws Exception if something goes wrong */ private double distanceToHrect(Instance x) throws Exception { double distance = 0.0; Instance closestPoint = new Instance(x); boolean inside; inside = clipToInsideHrect(closestPoint); if (!inside) distance = m_EuclideanDistance.distance(closestPoint, x); return distance; } /** * Finds the closest point in the hyper rectangle to a given point. * Change the given point to this closest point by clipping of * at all the dimensions to be clipped of. * If the point is inside the rectangle it stays unchanged. * The return value is true if the point was not changed, so the * the return value is true if the point was inside the rectangle. * * @param x a point * @return true if the input point stayed unchanged. */ private boolean clipToInsideHrect(Instance x) { boolean inside = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { //TODO treat nominals differently!?? if (x.value(i) < m_NodeRanges[i][R_MIN]) { x.setValue(i, m_NodeRanges[i][R_MIN]); inside = false; } else if (x.value(i) > m_NodeRanges[i][R_MAX]) { x.setValue(i, m_NodeRanges[i][R_MAX]); inside = false; } } return inside; } /** * Assigns instances of this node to center. Center to be assign to * is decided by the distance function. * * @param ranges min's and max's of attributes * @param centers all the input centers * @param centList the list of centers to work with * @param assignments index list of last assignments * @throws Exception if something goes wrong */ public void assignSubToCenters(double [][] ranges, Instances centers, int [] centList, int [] assignments) throws Exception { //todo: undecided situations // WARNING: assignments is "input/output-parameter" // should not be null and the following should not happen if (assignments == null) { assignments = new int[m_Instances.numInstances()]; for (int i = 0; i < assignments.length; i++) { assignments[i] = -1; } } // set assignments for all instances of this node for (int i = m_Start; i <= m_End; i++) { int instIndex = m_InstList[i]; Instance inst = m_Instances.instance(instIndex); //if (instList[i] == 664) System.out.println("664***"); int newC = m_EuclideanDistance.closestPoint(inst, centers, centList); // int newC = clusterProcessedInstance(inst, centers); assignments[instIndex] = newC; } } /** * Find k nearest neighbours to target by simply searching through all instances * in the leaf. * No check on missing class. * * @param target the instance to find nearest neighbour for * @return the minimal distance found * @throws Exception if something goes wrong */ public double simpleKNearestNeighbour(Instance target) throws Exception { double dist = 0; int currIndex; // sets and uses: // double m_MinDist // double m_MaxMinDist // int m_FurthestNear int i = m_NearestListLength; int index = m_Start; // if no instances, return max value as distance if (m_End < m_Start)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -