📄 kdtree.java
字号:
return Double.MAX_VALUE; if (m_NearestListLength<m_kNN) { for (;(index <= m_End) && (i < m_kNN);) { currIndex = m_InstList[index]; Instance trainInstance = m_Instances.instance(m_InstList[index]); if (target != trainInstance) { // for hold-one-out cross-validation //if(print==true) // OOPS("K: "+i); dist = m_EuclideanDistance.distance(target, trainInstance, Double.MAX_VALUE, print); m_NearestList[i] = currIndex; m_DistanceList[i] = dist; i++; } index++; } m_NearestListLength = i; } // set the new furthest nearest m_FurthestNear = checkFurthestNear(); //FURTHEST IN m_kNN NEAREST NEIGHBOURS m_MaxMinDist = m_DistanceList[m_FurthestNear]; int firstFurthestIndex=-1; double firstFurthestDistance = -1; int oldNearestListLength = -1; // check all or rest of instances if nearer for (; index <= m_End; index++) { currIndex = m_InstList[index]; Instance trainInstance = m_Instances.instance(currIndex); if (target != trainInstance) { // for hold-one-out cross-validation dist = m_EuclideanDistance.distance(target, trainInstance, m_MaxMinDist, print); // is instance one of the nearest? if (dist < m_MaxMinDist) { // set instance as one of the nearest, // replacing the last furthest nearest firstFurthestIndex = m_NearestList[m_FurthestNear]; firstFurthestDistance = m_DistanceList[m_FurthestNear]; m_NearestList[m_FurthestNear] = currIndex; m_DistanceList[m_FurthestNear] = dist; // set the new furthest nearest m_FurthestNear = checkFurthestNear(); m_MaxMinDist = m_DistanceList[m_FurthestNear]; if (m_MultipleFurthest) { // remove multiple entries of old furthest nearest oldNearestListLength = m_NearestListLength; m_NearestListLength = m_kNN; m_MultipleFurthest = false; } //the instance just replaced is at same distance as furthest nearest //therefore there are multiple furthest nearest if (firstFurthestDistance == m_MaxMinDist) { m_MultipleFurthest = true; if(oldNearestListLength!=-1) m_NearestListLength = oldNearestListLength; m_NearestList[m_NearestListLength] = firstFurthestIndex; m_DistanceList[m_NearestListLength] = firstFurthestDistance; m_NearestListLength++; } //get rid of the old list length as it is no longer needed and can create problems. oldNearestListLength = m_NearestListLength; } else { if (dist == m_MaxMinDist) { // instance is at same distance as furthest nearest m_MultipleFurthest = true; m_NearestList[m_NearestListLength] = currIndex; m_DistanceList[m_NearestListLength] = dist; m_NearestListLength++; } } } } return m_MaxMinDist; } /** * Finds the nearest neighbour to target, this method is called recursively. * @param target the instance to find nearest neighbour for * @return the minimal distance found * @throws Exception if something goes wrong */ private double kNearestNeighbour(Instance target) throws Exception { double maxDist; KDTreeNode nearer, further; // if is a leaf then the instance is in this hyperrectangle if (this.isALeaf()) { // return distance to kthnearest (and index of all // all k nearest in m_NearestList) return this.simpleKNearestNeighbour(target); } boolean targetInLeft = m_EuclideanDistance.valueIsSmallerEqual( target, m_SplitDim, m_SplitValue); if (targetInLeft) { nearer = m_Left; further = m_Right; } else { nearer = m_Right; further = m_Left; } // look for nearer neighbours in nearer half maxDist = nearer.kNearestNeighbour(target); // ... now look in further half if maxDist reaches into it Instance splitPoint = new Instance(target); splitPoint.setValue(m_SplitDim, m_SplitValue); double distanceToSplit = m_EuclideanDistance.distance(target, splitPoint, Double.MAX_VALUE); boolean lookInSecondHalf = maxDist >= distanceToSplit; if (lookInSecondHalf) { //System.out.println("Searching into the 2nd half of the tree."); // look for nearer neighbours in further half maxDist = further.kNearestNeighbour(target); } return maxDist; } } /** * sets the instances and builds the KDTree * * @param instances the instances to build the tree from * @throws Exception if something goes wrong */ public void setInstances(Instances instances) throws Exception { buildKDTree(instances); } /** * Builds the KDTree. * It is adviseable to run the replace missing attributes filter on the * passed instances first. * * @param instances instances to build the tree of * @throws Exception if something goes wrong */ private void buildKDTree(Instances instances) throws Exception { checkMissing(instances); //double [][] ranges = instances.initializeRanges(); if(m_EuclideanDistance == null ) m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(instances); else m_EuclideanDistance.setInstances(instances); m_Instances = instances; int numInst = m_Instances.numInstances(); // Make the global index list m_InstList = new int[numInst]; for (int i = 0; i < numInst; i++) { m_InstList[i] = i; } // make the tree starting with the roor node m_Root = new KDTreeNode(); // set global ranges m_Universe = m_EuclideanDistance.getRanges(); // build the tree int [] num = new int[1]; num[0] = 0; m_Root.makeKDTreeNode(num, m_Universe, //ranges, 0, // index of first instance index numInst - 1); // index of last instance index } /** * Adds one instance to the KDTree. This updates the KDTree structure to take * into account the newly added training instance. * * @param instance the instance to be added. Usually the newly added instance * in the training set. * @throws Exception if something goes wrong or instances are null */ public void update(Instance instance) throws Exception { //better to change to addInstance if(m_Instances==null) throw new Exception("No instances supplied yet. Have to call " + "setInstances(instances) with a set of Instances " + "first."); boolean success = m_Root.addInstance(instance); if (!success) { // make a new tree buildKDTree(m_Instances); } } /** * Adds one instance to KDTree loosly. It only changes the ranges in * EuclideanDistance, and does not affect the structure of the KDTree. * * @param instance the new instance. Usually this is the test instance * supplied to update the range of attributes in the distance function. */ public void addInstanceInfo(Instance instance) { m_EuclideanDistance.updateRanges(instance); } /** * string representing the tree * * @return string representing the tree */ public String toString() { StringBuffer text = new StringBuffer(); KDTreeNode tree = m_Root; if(m_Root==null) { text.append("KDTree not built yet."); return text.toString(); } int[] num = new int[1]; num[0] = 0; text.append("\nKDTree build:"); text.append(tree.statToString(true, true)); // tree in string format: text.append(tree.nodeToString(true)); return text.toString(); } /** * Assigns instances to centers using KDTree. * * @param centers the current centers * @param assignments the centerindex for each instance * @param pc the threshold value for pruning. * @throws Exception if something goes wrong */ public void centerInstances(Instances centers, int [] assignments, double pc) throws Exception { int [] centList = new int[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) centList[i] = i; m_Root.determineAssignments(centers, centList, assignments, pc); } /** * Returns array of boolean set true or false if instance is part * of next left kdtree. * * @param left list of boolean values, true if instance belongs to left * @param startIdx * @param endIdx * @param splitDim index of splitting attribute * @param splitValue value at which the node is split * @return number of instances that belong to the left */ private int checkSplitInstances(boolean [] left, int startIdx, int endIdx, int splitDim, double splitValue) { // length of left should be same as length of instList int numLeft = 0; for (int i = startIdx, j = 0; i <= endIdx; i++, j++) { // value <= splitValue if (m_EuclideanDistance.valueIsSmallerEqual( m_Instances.instance(m_InstList[i]), splitDim, splitValue)) { left[j] = true; numLeft++; } else { left[j] = false; } } return numLeft; } /** * Sorts instances newly into left and right part. * * @param left list of flags, set true by this method if instance * should go to the left follow node * @param startIdx * @param endIdx * @param startLeft */ private void splitInstances(boolean [] left, int startIdx, int endIdx, int startLeft) { int tmp; //shuffling indices in the left node to the left of the array and those in //right node to the right side of the array (see makeKDTreeNode() for //referred startLeft, numLeft and startRight variables). //After for loop starting from startLeft, numLeft indices will be on left //and the rest will be on right starting from startRight for (int i = startIdx, j = 0; i <= endIdx; i++, j++) { if (left[j]) { tmp = m_InstList[startLeft]; m_InstList[startLeft++] = m_InstList[i]; //instList[i]; m_InstList[i] = tmp; } } } /** * Checks if there is any instance with missing values. Throws an exception * if there is, as KDTree does not handle missing values. * * @param instances the instances to check * @throws Exception if missing values are encountered */ private void checkMissing(Instances instances) throws Exception { for(int i=0; i<instances.numInstances(); i++) { Instance ins = instances.instance(i); for(int j=0; j<ins.numValues(); j++) { if(ins.index(j) != ins.classIndex()) if(ins.isMissingSparse(j)) { throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree."); } } } } /** * Checks if there is any missing value in the instance. * * @param ins the instances to check * @throws Exception if missing values are encountered */ private void checkMissing(Instance ins) throws Exception { for(int j=0; j<ins.numValues(); j++) { if(ins.index(j) != ins.classIndex()) if(ins.isMissingSparse(j)) { throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree."); } } } /** -------------------------------------------------------------------------------- ** variables for nearest neighbour search * --------------------------------------------------------------------------------*/ /** index/indices of current target */ private int [] m_NearestList; /** length of nearest list (can be larger than k) */ private int m_NearestListLength = 0; /** true if more than of k nearest neighbours */ private boolean m_MultipleFurthest = false; /** number of nearest neighbours k */ private int m_kNN = 0; /** distance to current furthest of the neighbours */ private double m_MaxMinDist = Double.MAX_VALUE; /** index of the furthest of the neighbours in m_NearestList */ private int m_FurthestNear = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -