📄 kdtree.java
字号:
* nearestNeighbour functions. * @throws Exception if called before calling kNearestNeighbours or * nearestNeighbours. */ public double[] getDistances() throws Exception { if (m_Instances == null || m_DistanceList == null) throw new Exception("The tree has not been supplied with a set of " + "instances or getDistances() has been called " + "before calling kNearestNeighbours()."); return m_DistanceList; } /** * Builds the KDTree on the given set of instances. * @param instances The insts on which the KDTree is to be * built. * @throws Exception If some error occurs while * building the KDTree */ public void setInstances(Instances instances) throws Exception { super.setInstances(instances); buildKDTree(instances); } /** * Adds one instance to the KDTree. This updates the KDTree structure to take * into account the newly added training instance. * * @param instance the instance to be added. Usually the newly added instance in the * training set. * @throws Exception If the instance cannot be added. */ public void update(Instance instance) throws Exception { // better to change // to addInstance if (m_Instances == null) throw new Exception("No instances supplied yet. Have to call " + "setInstances(instances) with a set of Instances " + "first."); addInstanceInfo(instance); addInstanceToTree(instance, m_Root); } /** * Recursively adds an instance to the tree starting from * the supplied KDTreeNode. * NOTE: This should not be called by outside classes, * outside classes should instead call update(Instance) * method. * * @param inst The instance to add to the tree * @param node The node to start the recursive search * from, for the leaf node where the supplied instance * would go. * @throws Exception If some error occurs while adding * the instance. */ protected void addInstanceToTree(Instance inst, KDTreeNode node) throws Exception { if (node.isALeaf()) { int instList[] = new int[m_Instances.numInstances()]; try { System.arraycopy(m_InstList, 0, instList, 0, node.m_End + 1); // m_InstList.squeezeIn(m_End, // index); if (node.m_End < m_InstList.length - 1) System.arraycopy(m_InstList, node.m_End + 1, instList, node.m_End + 2, m_InstList.length - node.m_End - 1); instList[node.m_End + 1] = m_Instances.numInstances() - 1; } catch (ArrayIndexOutOfBoundsException ex) { System.err.println("m_InstList.length: " + m_InstList.length + " instList.length: " + instList.length + "node.m_End+1: " + (node.m_End + 1) + "m_InstList.length-node.m_End+1: " + (m_InstList.length - node.m_End - 1)); throw ex; } m_InstList = instList; node.m_End++; node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, node.m_NodeRanges); m_Splitter.setInstanceList(m_InstList); // split this leaf node if necessary double[][] universe = m_EuclideanDistance.getRanges(); if (node.numInstances() > m_MaxInstInLeaf && getMaxRelativeNodeWidth(node.m_NodeRanges, universe) > m_MinBoxRelWidth) { m_Splitter.splitNode(node, m_NumNodes, node.m_NodeRanges, universe); m_NumNodes += 2; } }// end if node is a leaf else { if (m_EuclideanDistance.valueIsSmallerEqual(inst, node.m_SplitDim, node.m_SplitValue)) { addInstanceToTree(inst, node.m_Left); afterAddInstance(node.m_Right); } else addInstanceToTree(inst, node.m_Right); node.m_End++; node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, node.m_NodeRanges); } } /** * Corrects the start and end indices of a * KDTreeNode after an instance is added to * the tree. The start and end indices for * the master index array (m_InstList) * stored in the nodes need to be updated * for all nodes in the subtree on the * right of a node where the instance * was added. * NOTE: No outside class should call this * method. * * @param node KDTreeNode whose start and end indices * need to be updated. */ protected void afterAddInstance(KDTreeNode node) { node.m_Start++; node.m_End++; if (!node.isALeaf()) { afterAddInstance(node.m_Left); afterAddInstance(node.m_Right); } } /** * Adds one instance to KDTree loosly. It only changes the ranges in * EuclideanDistance, and does not affect the structure of the KDTree. * * @param instance the new instance. Usually this is the test instance * supplied to update the range of attributes in the distance function. */ public void addInstanceInfo(Instance instance) { m_EuclideanDistance.updateRanges(instance); } /** * Checks if there is any instance with missing values. Throws an exception if * there is, as KDTree does not handle missing values. * * @param instances the instances to check * @throws Exception if missing values are encountered */ protected void checkMissing(Instances instances) throws Exception { for (int i = 0; i < instances.numInstances(); i++) { Instance ins = instances.instance(i); for (int j = 0; j < ins.numValues(); j++) { if (ins.index(j) != ins.classIndex()) if (ins.isMissingSparse(j)) { throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree."); } } } } /** * Checks if there is any missing value in the given * instance. * @param ins The instance to check missing values in. * @throws Exception If there is a missing value in the * instance. */ protected void checkMissing(Instance ins) throws Exception { for (int j = 0; j < ins.numValues(); j++) { if (ins.index(j) != ins.classIndex()) if (ins.isMissingSparse(j)) { throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree."); } } } /** * Returns the maximum attribute width of instances/points * in a KDTreeNode relative to the whole dataset. * * @param nodeRanges The attribute ranges of the * KDTreeNode whose maximum relative width is to be * determined. * @param universe The attribute ranges of the whole * dataset (training instances + test instances so * far encountered). * @return The maximum relative width */ protected double getMaxRelativeNodeWidth(double[][] nodeRanges, double[][] universe) { int widest = widestDim(nodeRanges, universe); return nodeRanges[widest][WIDTH] / universe[widest][WIDTH]; } /** * Returns the widest dimension/attribute in a * KDTreeNode (widest after normalizing). * @param nodeRanges The attribute ranges of * the KDTreeNode. * @param universe The attribute ranges of the * whole dataset (training instances + test * instances so far encountered). * @return The index of the widest * dimension/attribute. */ protected int widestDim(double[][] nodeRanges, double[][] universe) { final int classIdx = m_Instances.classIndex(); double widest = 0.0; int w = -1; if (m_NormalizeNodeWidth) { for (int i = 0; i < nodeRanges.length; i++) { double newWidest = nodeRanges[i][WIDTH] / universe[i][WIDTH]; if (newWidest > widest) { if (i == classIdx) continue; widest = newWidest; w = i; } } } else { for (int i = 0; i < nodeRanges.length; i++) { if (nodeRanges[i][WIDTH] > widest) { if (i == classIdx) continue; widest = nodeRanges[i][WIDTH]; w = i; } } } return w; } /** * Returns the size of the tree. * * @return the size of the tree */ public double measureTreeSize() { return m_NumNodes; } /** * Returns the number of leaves. * * @return the number of leaves */ public double measureNumLeaves() { return m_NumLeaves; } /** * Returns the depth of the tree. * * @return The depth of the tree */ public double measureMaxDepth() { return m_MaxDepth; } /** * Returns an enumeration of the additional measure names. * * @return an enumeration of the measure names */ public Enumeration enumerateMeasures() { Vector newVector = new Vector(); newVector.addElement("measureTreeSize"); newVector.addElement("measureNumLeaves"); newVector.addElement("measureMaxDepth"); if (m_Stats != null) { for (Enumeration e = m_Stats.enumerateMeasures(); e.hasMoreElements();) { newVector.addElement(e.nextElement()); } } return newVector.elements(); } /** * Returns the value of the named measure. * * @param additionalMeasureName the name of * the measure to query for its value. * @return The value of the named measure * @throws IllegalArgumentException If the named measure * is not supported. */ public double getMeasure(String additionalMeasureName) { if (additionalMeasureName.compareToIgnoreCase("measureMaxDepth") == 0) { return measureMaxDepth(); } else if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) { return measureTreeSize(); } else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) { return measureNumLeaves(); } else if (m_Stats != null) { return m_Stats.getMeasure(additionalMeasureName); } else { throw new IllegalArgumentException(additionalMeasureName + " not supported (KDTree)"); } } /** * Sets whether to calculate the performance statistics or not. * @param measurePerformance Should be true if performance * statistics are to be measured. */ public void setMeasurePerformance(boolean measurePerformance) { m_MeasurePerformance = measurePerformance; if (m_MeasurePerformance) { if (m_Stats == null) m_Stats = m_TreeStats = new TreePerformanceStats(); } else m_Stats = m_TreeStats = null; } /** * Assigns instances to centers using KDTree. * * @param centers the current centers * @param assignments the centerindex for each instance * @param pc the threshold value for pruning. * @throws Exception If there is some problem * assigning instances to centers. */ public void centerInstances(Instances centers, int[] assignments, double pc) throws Exception { int[] centList = new int[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) centList[i] = i; determineAssignments(m_Root, centers, centList, assignments, pc); } /** * Assigns instances to the current centers called candidates. * * @param node The node to start assigning the instances from. * @param centers all the current centers. * @param candidates the current centers the method works on. * @param assignments the center index for each instance. * @param pc the threshold value for pruning. * @throws Exception If there is some problem assigning * instances to centers. */ protected void determineAssignments(KDTreeNode node, Instances centers, int[] candidates, int[] assignments, double pc) throws Exception { // reduce number of owners for current hyper rectangle int[] owners = refineOwners(node, centers, candidates); // only one owner if (owners.length == 1) { // all instances of this node are owned by one center for (int i = node.m_Start; i <= node.m_End; i++) { assignments[m_InstList[i]] // the assignment of this instance = owners[0]; // is the current owner } } else if (!node.isALeaf()) { // more than one owner and it is not a leaf determineAssignments(node.m_Left, centers, owners, assignments, pc); determineAssignments(node.m_Right, centers, owners, assignments, pc); } else { // this is a leaf and there are more than 1 owner // XMeans. assignSubToCenters(node, centers, owners, assignments); } } /** * Refines the ownerlist. * * @param node The current tree node. * @param centers all centers * @param candidates the indexes of those centers that are candidates. * @return list of owners * @throws Exception If some problem occurs in refining. */ protected int[] refineOwners(KDTreeNode node, Instances centers, int[] candidates) throws Exception { int[] owners = new int[candidates.length]; double minDistance = Double.POSITIVE_INFINITY; int ownerIndex = -1; Instance owner; int numCand = candidates.length; double[] distance = new double[numCand]; boolean[] inside = new boolean[numCand]; for (int i = 0; i < numCand; i++) { distance[i] = distanceToHrect(node, centers.instance(candidates[i])); inside[i] = (distance[i] == 0.0); if (distance[i] < minDistance) { minDistance = distance[i]; ownerIndex = i; } } owner = new Instance(centers.instance(candidates[ownerIndex])); // are there other owners // loop also goes over already found owner, keeps order // in owner list int index = 0; for (int i = 0; i < numCand; i++) { // 1. all centers that are points within rectangle are owners if ((inside[i]) // 2. take all points with same distance to the rect. as the owner || (distance[i] == distance[ownerIndex])) { // add competitor to owners list owners[index++] = candidates[i]; } else { Instance competitor = new Instance(centers.instance(candidates[i])); if // 3. point has larger distance to rectangle but still can compete // with owner for some points in the rectangle (!candidateIsFullOwner(node, owner, competitor)) { // also add competitor to owners list owners[index++] = candidates[i]; } } } int[] result = new int[index]; for (int i = 0; i < index; i++) result[i] = owners[i]; return result; } /** * Returns the distance between a point and an hyperrectangle. * * @param node The current node from whose hyperrectangle * the distance is to be measured.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -