📄 kdtree.java
字号:
/** distance to current nearest neighbour */ private double [] m_DistanceList; /** * Returns the distances to the kNearest or 1 nearest neighbour currently * found with either the kNearestNeighbours or the nearestNeighbour method. * * @return distances[] array containing the distances of the * nearestNeighbours. The length and ordering of the array is the * same as that of the instances returned by nearestNeighbour * functions. * @throws Exception Throws an exception if called before calling kNearestNeighbours * or nearestNeighbours. */ public double[] getDistances() throws Exception { if(m_Instances==null || m_DistanceList==null) throw new Exception("The tree has not been supplied with a set of " + "instances or getDistances() has been called " + "before calling kNearestNeighbours()."); return m_DistanceList; } /** debug pls remove after use. */ private boolean print = false; /** * Returns the k nearest neighbours to the supplied instance. * * @param target The instance to find the nearest neighbours for. * @param k The number of neighbours to find. * @return the neighbors * @throws Exception Throws an exception if the nearest neighbour could not be * found. */ public Instances kNearestNeighbours(Instance target, int k) throws Exception { checkMissing(target); if(m_Instances==null) throw new Exception("No instances supplied yet. Have to call " + "setInstances(instances) with a set of Instances " + "first."); m_kNN = k; double maxDist; m_NearestList = new int[m_Instances.numInstances()]; m_DistanceList = new double[m_Instances.numInstances()]; m_NearestListLength = 0; for(int i=0; i<m_DistanceList.length; i++) { m_DistanceList[i] = Double.MAX_VALUE; } maxDist = m_Root.kNearestNeighbour(target); combSort11(m_DistanceList, m_NearestList); m_EuclideanDistance.postProcessDistances(m_DistanceList); Instances nearest = new Instances(m_Instances, 0); double [] newDistanceList = new double[m_NearestListLength]; for(int i=0; i<m_NearestListLength; i++) { nearest.add(m_Instances.instance(m_NearestList[i])); newDistanceList[i] = m_DistanceList[i]; } m_DistanceList = newDistanceList; return nearest; } /** * Returns the nearest neighbour to the supplied instance. * * @param target The instance to find the nearest neighbour for. * @return the nearest neighbor * @throws Exception Throws an exception if the neighbours could not be found. */ public Instance nearestNeighbour(Instance target) throws Exception { return (kNearestNeighbours(target, 1)).instance(0); } /** * Find k nearest neighbours to target. This is the main method. * * @param target the instance to find nearest neighbour for * @param kNN the number of neighbors to find * @param nearestList * @param distanceList * @return * @throws Exception if something goes wrong */ //redundant no longer needed public int findKNearestNeighbour(Instance target, int kNN, int [] nearestList, double [] distanceList) throws Exception { m_kNN = kNN; m_NearestList = nearestList; m_DistanceList = distanceList; m_NearestListLength = 0; for(int i=0; i<distanceList.length; i++) { distanceList[i] = Double.MAX_VALUE; } int[] num = new int[1]; num[0] = 0; double minDist = m_Root.kNearestNeighbour(target); return m_NearestListLength; } /** * Get the distance of the furthest of the nearest neighbour * returns the index of this instance * in the index list. * * @return the index of the instance */ private int checkFurthestNear() { double max = 0.0; int furthestNear = 0; for (int i = 0; i < m_kNN; i++) { if (m_DistanceList[i] > max) { max = m_DistanceList[i]; furthestNear = i; } } return furthestNear; } /** * the GET and SET - functions =============================================== **/ /** * Tip text for this property * * @return the tip text for this property */ public String minBoxRelWidthTipText() { return "The minimum relative width of the box. A node is only made a leaf "+ "if the width of the split dimension of the instances in a node " + "normalized over the width of the split dimension of all the " + "instances is less than or equal to this minimum relative width."; } /** * Sets the minimum relative box width. * * @param i the minimum relative box width */ public void setMinBoxRelWidth(double i) { m_MinBoxRelWidth = i; } /** * Gets the minimum relative box width. * @return the minimum relative box width */ public double getMinBoxRelWidth() { return m_MinBoxRelWidth; } /** * Tip text for this property * * @return the tip text for this property */ public String maxInstInLeafTipText() { return "The max number of instances in a leaf."; } /** * Sets the maximum number of instances in a leaf. * * @param i the maximum number of instances in a leaf */ public void setMaxInstInLeaf(int i) { m_MaxInstInLeaf = i; } /** * Get the maximum number of instances in a leaf. * * @return the maximum number of instances in a leaf */ public int getMaxInstInLeaf() { return m_MaxInstInLeaf; } /** * Tip text for this property * * @return the tip text for this property */ public String normalizeNodeWidthTipText() { return "Whether if the widths of the KDTree node should be normalized " + "by the width of the universe or not. "+ "Where, width of the node is the range of the split attribute "+ "based on the instances in that node, and width of the " + "universe is the range of the split attribute based on all the " + "instances (default: false)."; } /** * Sets the flag for normalizing the widths of a KDTree Node by the width * of the dimension in the universe. * * @param n true to use normalizing. */ public void setNormalizeNodeWidth(boolean n) { m_NormalizeNodeWidth = n; } /** * Gets the normalize flag. * * @return True if normalizing */ public boolean getNormalizeNodeWidth() { return m_NormalizeNodeWidth; } /** * returns the distance function currently in use * * @return the distance function */ public DistanceFunction getDistanceFunction() { return (DistanceFunction) m_EuclideanDistance; } /** * sets the distance function to use for nearest neighbour search * * @param df the distance function to use * @throws Exception if not EuclideanDistance */ public void setDistanceFunction(DistanceFunction df) throws Exception { if(!(df instanceof EuclideanDistance)) throw new Exception("KDTree currently only works with " + "EuclideanDistanceFunction."); m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df; } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the KDTree search algorithm for nearest "+ "neighbour search.\n" + "The connection to dataset is only a reference. For the tree " + "structure the indexes are stored in an array. \n" + "Building the tree:\n" + "If a node has <maximal-inst-number> (option -L) instances no " + "further splitting is done. Also if the split would leave one " + "side empty, the branch is not split any further even if the " + "instances in the resulting node are more than " + "<maximal-inst-number> instances.\n" + "**PLEASE NOTE:** The algorithm can not handle missing values, so it " + "is advisable to run ReplaceMissingValues filter if there are any " + "missing values in the dataset."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option("\tSet minimal width of a box\n"+ "\t(default = 1.0E-2).", "W", 0,"-W <value>")); newVector.addElement(new Option("\tMaximal number of instances in a leaf\n"+ "\t(default = 40).", "L", 0,"-L")); newVector.addElement(new Option("\tNormalizing will be done\n"+ "\t(Select dimension for split, with "+ "normalising to universe).", "N", 0,"-N")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -W <value> * Set minimal width of a box * (default = 1.0E-2).</pre> * * <pre> -L * Maximal number of instances in a leaf * (default = 40).</pre> * * <pre> -N * Normalizing will be done * (Select dimension for split, with normalising to universe).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { super.setOptions(options); String optionString = Utils.getOption('W', options); if (optionString.length() != 0) { setMinBoxRelWidth(Double.parseDouble(optionString)); } optionString = Utils.getOption('L', options); if (optionString.length() != 0) { setMaxInstInLeaf(Integer.parseInt(optionString)); } if (Utils.getFlag('N', options)) { setNormalizeNodeWidth(true); } else { setNormalizeNodeWidth(false); } } /** * Gets the current settings of KDtree. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { String[] superOptions = super.getOptions(); String[] options = new String[7+superOptions.length]; int current = 0; System.arraycopy(superOptions, current, options, current, superOptions.length); current = superOptions.length; options[current++] = "-W"; options[current++] = "" + getMinBoxRelWidth(); options[current++] = "-L"; options[current++] = "" + getMaxInstInLeaf(); if (getNormalizeNodeWidth()) { options[current++] = "-N"; } while (current < options.length) { options[current++] = ""; } return options; } /** * Main method for testing this class * * @param args the commandline parameters */ public static void main(String [] args) { try { if (args.length < 1 ) { System.err.println("Usage: " + KDTree.class.getName() + " <dataset>"); System.exit(1); } Instances insts = new Instances(new java.io.FileReader(args[0])); KDTree tree = new KDTree(); DistanceFunction df = new EuclideanDistance(); df.setInstances(insts); tree.setInstances(insts); tree.setDistanceFunction(df); System.out.println(tree.toString()); } catch (Exception ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -