📄 kdtree.java
字号:
* @param x the point * @return the distance * @throws Exception If some problem occurs in determining * the distance to the hyperrectangle. */ protected double distanceToHrect(KDTreeNode node, Instance x) throws Exception { double distance = 0.0; Instance closestPoint = new Instance(x); boolean inside; inside = clipToInsideHrect(node, closestPoint); if (!inside) distance = m_EuclideanDistance.distance(closestPoint, x); return distance; } /** * Finds the closest point in the hyper rectangle to a given point. Change the * given point to this closest point by clipping of at all the dimensions to * be clipped of. If the point is inside the rectangle it stays unchanged. The * return value is true if the point was not changed, so the the return value * is true if the point was inside the rectangle. * * @param node The current KDTreeNode in whose hyperrectangle the closest * point is to be found. * @param x a point * @return true if the input point stayed unchanged. */ protected boolean clipToInsideHrect(KDTreeNode node, Instance x) { boolean inside = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { // TODO treat nominals differently!?? if (x.value(i) < node.m_NodeRanges[i][MIN]) { x.setValue(i, node.m_NodeRanges[i][MIN]); inside = false; } else if (x.value(i) > node.m_NodeRanges[i][MAX]) { x.setValue(i, node.m_NodeRanges[i][MAX]); inside = false; } } return inside; } /** * Returns true if candidate is a full owner in respect to a competitor. * <p> * * The candidate has been the closer point to the current rectangle or even * has been a point within the rectangle. The competitor is competing with the * candidate for a few points out of the rectangle although it is a point * further away from the rectangle then the candidate. The extrem point is the * corner of the rectangle that is furthest away from the candidate towards * the direction of the competitor. * * If the distance candidate to this extreme point is smaller then the * distance competitor to this extreme point, then it is proven that none of * the points in the rectangle can be owned be the competitor and the * candidate is full owner of the rectangle in respect to this competitor. See * also D. Pelleg and A. Moore's paper 'Accelerating exact k-means Algorithms * with Geometric Reasoning'. * <p> * * @param node The current KDTreeNode / hyperrectangle. * @param candidate instance that is candidate to be owner * @param competitor instance that competes against the candidate * @return true if candidate is full owner * @throws Exception If some problem occurs. */ protected boolean candidateIsFullOwner(KDTreeNode node, Instance candidate, Instance competitor) throws Exception { // get extreme point Instance extreme = new Instance(candidate); for (int i = 0; i < m_Instances.numAttributes(); i++) { if ((competitor.value(i) - candidate.value(i)) > 0) { extreme.setValue(i, node.m_NodeRanges[i][MAX]); } else { extreme.setValue(i, node.m_NodeRanges[i][MIN]); } } boolean isFullOwner = m_EuclideanDistance.distance(extreme, candidate) < m_EuclideanDistance .distance(extreme, competitor); return isFullOwner; } /** * Assigns instances of this node to center. Center to be assign to is decided * by the distance function. * * @param node The KDTreeNode whose instances are to be assigned. * @param centers all the input centers * @param centList the list of centers to work with * @param assignments index list of last assignments * @throws Exception If there is error assigning the instances. */ public void assignSubToCenters(KDTreeNode node, Instances centers, int[] centList, int[] assignments) throws Exception { // todo: undecided situations int numCent = centList.length; // WARNING: assignments is "input/output-parameter" // should not be null and the following should not happen if (assignments == null) { assignments = new int[m_Instances.numInstances()]; for (int i = 0; i < assignments.length; i++) { assignments[i] = -1; } } // set assignments for all instances of this node for (int i = node.m_Start; i <= node.m_End; i++) { int instIndex = m_InstList[i]; Instance inst = m_Instances.instance(instIndex); // if (instList[i] == 664) System.out.println("664***"); int newC = m_EuclideanDistance.closestPoint(inst, centers, centList); // int newC = clusterProcessedInstance(inst, centers); assignments[instIndex] = newC; } } /** * Properties' variables ===================================================== */ /** flag for normalizing. */ boolean m_NormalizeNodeWidth = true; /** The euclidean distance function to use. */ protected EuclideanDistance m_EuclideanDistance; { // to make sure we have only one object of EuclideanDistance if (m_DistanceFunction instanceof EuclideanDistance) m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction; else m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(); } /** minimal relative width of a KDTree rectangle. */ protected double m_MinBoxRelWidth = 1.0E-2; /** maximal number of instances in a leaf. */ protected int m_MaxInstInLeaf = 40; /** * the GET and SET - functions =============================================== */ /** * Tip text for this property. * * @return the tip text for this property */ public String minBoxRelWidthTipText() { return "The minimum relative width of the box. A node is only made a leaf " + "if the width of the split dimension of the instances in a node " + "normalized over the width of the split dimension of all the " + "instances is less than or equal to this minimum relative width."; } /** * Sets the minimum relative box width. * * @param i the minimum relative box width */ public void setMinBoxRelWidth(double i) { m_MinBoxRelWidth = i; } /** * Gets the minimum relative box width. * * @return the minimum relative box width */ public double getMinBoxRelWidth() { return m_MinBoxRelWidth; } /** * Tip text for this property. * * @return the tip text for this property */ public String maxInstInLeafTipText() { return "The max number of instances in a leaf."; } /** * Sets the maximum number of instances in a leaf. * * @param i the maximum number of instances in a leaf */ public void setMaxInstInLeaf(int i) { m_MaxInstInLeaf = i; } /** * Get the maximum number of instances in a leaf. * * @return the maximum number of instances in a leaf */ public int getMaxInstInLeaf() { return m_MaxInstInLeaf; } /** * Tip text for this property. * * @return the tip text for this property */ public String normalizeNodeWidthTipText() { return "Whether if the widths of the KDTree node should be normalized " + "by the width of the universe or not. " + "Where, width of the node is the range of the split attribute " + "based on the instances in that node, and width of the " + "universe is the range of the split attribute based on all the " + "instances (default: false)."; } /** * Sets the flag for normalizing the widths of a KDTree Node by the width of * the dimension in the universe. * * @param n true to use normalizing. */ public void setNormalizeNodeWidth(boolean n) { m_NormalizeNodeWidth = n; } /** * Gets the normalize flag. * * @return True if normalizing */ public boolean getNormalizeNodeWidth() { return m_NormalizeNodeWidth; } /** * returns the distance function currently in use. * * @return the distance function */ public DistanceFunction getDistanceFunction() { return (DistanceFunction) m_EuclideanDistance; } /** * sets the distance function to use for nearest neighbour search. * * @param df the distance function to use * @throws Exception if not EuclideanDistance */ public void setDistanceFunction(DistanceFunction df) throws Exception { if (!(df instanceof EuclideanDistance)) throw new Exception("KDTree currently only works with " + "EuclideanDistanceFunction."); m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String nodeSplitterTipText() { return "The the splitting method to split the nodes of the KDTree."; } /** * Returns the splitting method currently in use to split the nodes of the * KDTree. * * @return The KDTreeNodeSplitter currently in use. */ public KDTreeNodeSplitter getNodeSplitter() { return m_Splitter; } /** * Sets the splitting method to use to split the nodes of the KDTree. * * @param splitter The KDTreeNodeSplitter to use. */ public void setNodeSplitter(KDTreeNodeSplitter splitter) { m_Splitter = splitter; } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the KDTree search algorithm for nearest " + "neighbour search.\n" + "The connection to dataset is only a reference. For the tree " + "structure the indexes are stored in an array. \n" + "Building the tree:\n" + "If a node has <maximal-inst-number> (option -L) instances no " + "further splitting is done. Also if the split would leave one " + "side empty, the branch is not split any further even if the " + "instances in the resulting node are more than " + "<maximal-inst-number> instances.\n" + "**PLEASE NOTE:** The algorithm can not handle missing values, so it " + "is advisable to run ReplaceMissingValues filter if there are any " + "missing values in the dataset.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.add(new Option( "\tNode splitting method to use.\n" + "\t(default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)", "S", 1, "-S <classname and options>")); newVector.addElement(new Option( "\tSet minimal width of a box\n" + "\t(default: 1.0E-2).", "W", 0, "-W <value>")); newVector.addElement(new Option( "\tMaximal number of instances in a leaf\n" + "\t(default: 40).", "L", 0, "-L")); newVector.addElement(new Option( "\tNormalizing will be done\n" + "\t(Select dimension for split, with normalising to universe).", "N", 0, "-N")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <classname and options> * Node splitting method to use. * (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)</pre> * * <pre> -W <value> * Set minimal width of a box * (default: 1.0E-2).</pre> * * <pre> -L * Maximal number of instances in a leaf * (default: 40).</pre> * * <pre> -N * Normalizing will be done * (Select dimension for split, with normalising to universe).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { super.setOptions(options); String optionString = Utils.getOption('S', options); if (optionString.length() != 0) { String splitMethodSpec[] = Utils.splitOptions(optionString); if (splitMethodSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = splitMethodSpec[0]; splitMethodSpec[0] = ""; setNodeSplitter((KDTreeNodeSplitter) Utils.forName( KDTreeNodeSplitter.class, className, splitMethodSpec)); } else { setNodeSplitter(new SlidingMidPointOfWidestSide()); } optionString = Utils.getOption('W', options); if (optionString.length() != 0) setMinBoxRelWidth(Double.parseDouble(optionString)); else setMinBoxRelWidth(1.0E-2); optionString = Utils.getOption('L', options); if (optionString.length() != 0) setMaxInstInLeaf(Integer.parseInt(optionString)); else setMaxInstInLeaf(40); setNormalizeNodeWidth(Utils.getFlag('N', options)); } /** * Gets the current settings of KDtree. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector<String> result; String[] options; int i; result = new Vector<String>(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-S"); result.add( (m_Splitter.getClass().getName() + " " + Utils.joinOptions(m_Splitter.getOptions())).trim()); result.add("-W"); result.add("" + getMinBoxRelWidth()); result.add("-L"); result.add("" + getMaxInstInLeaf()); if (getNormalizeNodeWidth()) result.add("-N"); return result.toArray(new String[result.size()]); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -