📄 covertree.java
字号:
new_node.idx = idx; return new_node; } /** * Creates a new leaf node for a given Instance/point p. * @param idx The index of the instance this leaf node * represents. * @return Newly created leaf CoverTreeNode. */ protected CoverTreeNode new_leaf(Integer idx) { // (const point &p) CoverTreeNode new_leaf = new CoverTreeNode(idx, 0.0, 0.0, null, 0, 100); return new_leaf; } /** * Returns the max distance of the reference point p in current node to * it's children nodes. * @param v The stack of DistanceNode objects. * @return Distance of the furthest child. */ protected double max_set(Stack<DistanceNode> v) { // rename to // maxChildDist double max = 0.0; for (int i = 0; i < v.length; i++) { DistanceNode n = v.element(i); if (max < n.dist.element(n.dist.length - 1).floatValue()) { // v[i].dist.last()) max = n.dist.element(n.dist.length - 1).floatValue(); // v[i].dist.last(); } } return max; } /** * Splits a given point_set into near and far based on the given * scale/level. All points with distance > base^max_scale would be moved * to far set. In other words, all those points that are not covered by the * next child ball of a point p (ball made of the same point p but of * smaller radius at the next lower level) are removed from the supplied * current point_set and put into far_set. * * @param point_set The supplied set from which all far points * would be removed. * @param far_set The set in which all far points having distance * > base^max_scale would be put into. * @param max_scale The given scale based on which the distances * of points are judged to be far or near. */ protected void split(Stack<DistanceNode> point_set, Stack<DistanceNode> far_set, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { DistanceNode n = point_set.element(i); if (n.dist.element(n.dist.length - 1).doubleValue() <= fmax) { point_set.set(new_index++, point_set.element(i)); } else far_set.push(point_set.element(i)); // point_set[i]); } List l = new java.util.LinkedList(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); //removing all and adding only the near points point_set.clear(); point_set.addAll(l); // point_set.index=new_index; } /** * Moves all the points in point_set covered by (the ball of) new_point * into new_point_set, based on the given scale/level. * * @param point_set The supplied set of instances from which * all points covered by new_point will be removed. * @param new_point_set The set in which all points covered by * new_point will be put into. * @param new_point The given new point. * @param max_scale The scale based on which distances are * judged (radius of cover ball is calculated). */ protected void dist_split(Stack<DistanceNode> point_set, Stack<DistanceNode> new_point_set, DistanceNode new_point, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { double new_d = Math.sqrt(m_DistanceFunction.distance(new_point.q(), point_set.element(i).q(), fmax*fmax)); if (new_d <= fmax) { point_set.element(i).dist.push(new_d); new_point_set.push(point_set.element(i)); } else point_set.set(new_index++, point_set.element(i)); } List l = new java.util.LinkedList(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); point_set.clear(); point_set.addAll(l); } /** * Creates a cover tree recursively using batch insert method. * * @param p The index of the instance from which to create the * first node. All other points will be inserted beneath this node * for p. * @param max_scale The current scale/level where the node is to be * created (Also determines the radius of the cover balls created at * this level). * @param top_scale The max scale in the whole tree. * @param point_set The set of unprocessed points from which child nodes * need to be created. * @param consumed_set The set of processed points from which child * nodes have already been created. This would be used to find the * radius of the cover ball of p. * @return the node of cover tree created with p. */ protected CoverTreeNode batch_insert(Integer p, int max_scale, // current // scale/level int top_scale, // max scale/level for this dataset Stack<DistanceNode> point_set, // set of points that are nearer to p // [will also contain returned unused // points] Stack<DistanceNode> consumed_set) // to return the set of points that have // been used to calc. max_dist to a // descendent // Stack<Stack<DistanceNode>> stack) //may not be needed { if (point_set.length == 0) { CoverTreeNode leaf = new_leaf(p); leaf.nodeid = m_NumNodes; m_NumNodes++; // incrementing node count m_NumLeaves++; // incrementing leaves count return leaf; } else { double max_dist = max_set(point_set); // O(|point_set|) the max dist // in point_set to point "p". int next_scale = Math.min(max_scale - 1, get_scale(max_dist)); if (next_scale == Integer.MIN_VALUE) { // We have points with distance // 0. if max_dist is 0. Stack<CoverTreeNode> children = new Stack<CoverTreeNode>(); CoverTreeNode leaf = new_leaf(p); leaf.nodeid = m_NumNodes; children.push(leaf); m_NumLeaves++; m_NumNodes++; // incrementing node and leaf count while (point_set.length > 0) { DistanceNode tmpnode = point_set.pop(); leaf = new_leaf(tmpnode.idx); leaf.nodeid = m_NumNodes; children.push(leaf); m_NumLeaves++; m_NumNodes++; // incrementing node and leaf count consumed_set.push(tmpnode); } CoverTreeNode n = new_node(p); // make a new node out of p and assign // it the children. n.nodeid = m_NumNodes; m_NumNodes++; // incrementing node count n.scale = 100; // A magic number meant to be larger than all scales. n.max_dist = 0; // since all points have distance 0 to p n.num_children = children.length; n.children = children; return n; } else { Stack<DistanceNode> far = new Stack<DistanceNode>(); split(point_set, far, max_scale); // O(|point_set|) CoverTreeNode child = batch_insert(p, next_scale, top_scale, point_set, consumed_set); if (point_set.length == 0) { // not creating any node in this // recursive call // push(stack,point_set); point_set.replaceAllBy(far); // point_set=far; return child; } else { CoverTreeNode n = new_node(p); n.nodeid = m_NumNodes; m_NumNodes++; // incrementing node count Stack<CoverTreeNode> children = new Stack<CoverTreeNode>(); children.push(child); while (point_set.length != 0) { // O(|point_set| * num_children) Stack<DistanceNode> new_point_set = new Stack<DistanceNode>(); Stack<DistanceNode> new_consumed_set = new Stack<DistanceNode>(); DistanceNode tmpnode = point_set.pop(); double new_dist = tmpnode.dist.last(); consumed_set.push(tmpnode); // putting points closer to new_point into new_point_set (and // removing them from point_set) dist_split(point_set, new_point_set, tmpnode, max_scale); // O(|point_saet|) // putting points closer to new_point into new_point_set (and // removing them from far) dist_split(far, new_point_set, tmpnode, max_scale); // O(|far|) CoverTreeNode new_child = batch_insert(tmpnode.idx, next_scale, top_scale, new_point_set, new_consumed_set); new_child.parent_dist = new_dist; children.push(new_child); // putting the unused points from new_point_set back into // point_set and far double fmax = dist_of_scale(max_scale); tmpnode = null; for (int i = 0; i < new_point_set.length; i++) { // O(|new_point_set|) tmpnode = new_point_set.element(i); tmpnode.dist.pop(); if (tmpnode.dist.last() <= fmax) point_set.push(tmpnode); else far.push(tmpnode); } // putting the points consumed while recursing for new_point // into consumed_set tmpnode = null; for (int i = 0; i < new_consumed_set.length; i++) { // O(|new_point_set|) tmpnode = new_consumed_set.element(i); tmpnode.dist.pop(); consumed_set.push(tmpnode); } }// end while(point_size.size!=0) point_set.replaceAllBy(far); // point_set=far; n.scale = top_scale - max_scale; n.max_dist = max_set(consumed_set); n.num_children = children.length; n.children = children; return n; }// end else if(pointset!=0) }// end else if(next_scale != -214.... }// end else if(pointset!=0) } /** * Builds the tree on the given set of instances. * P.S.: For internal use only. Outside classes * should call setInstances(). * @param insts The instances on which to build * the cover tree. * @throws Exception If the supplied set of * Instances is empty, or if there are missing * values. */ protected void buildCoverTree(Instances insts) throws Exception { if (insts.numInstances() == 0) throw new Exception( "CoverTree: Empty set of instances. Cannot build tree."); checkMissing(insts); if (m_EuclideanDistance == null) m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts); else m_EuclideanDistance.setInstances(insts); Stack<DistanceNode> point_set = new Stack<DistanceNode>(); Stack<DistanceNode> consumed_set = new Stack<DistanceNode>(); Instance point_p = insts.instance(0); int p_idx = 0; double max_dist=-1, dist=0.0; Instance max_q=point_p; for (int i = 1; i < insts.numInstances(); i++) { DistanceNode temp = new DistanceNode(); temp.dist = new Stack<Double>(); dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY)); if(dist > max_dist) { max_dist = dist; max_q = insts.instance(i); } temp.dist.push(dist); temp.idx = i; point_set.push(temp); } max_dist = max_set(point_set); m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist), point_set, consumed_set); }/*********************************NNSearch related stuff********************/ /** * A class for a heap to store the nearest k neighbours to an instance. * The heap also takes care of cases where multiple neighbours are the same * distance away. * i.e. the minimum size of the heap is k. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.3 $ */ protected class MyHeap { /** the heap. */ MyHeapElement m_heap[] = null; /** * constructor. * @param maxSize the maximum size of the heap */ public MyHeap(int maxSize) { if((maxSize%2)==0) maxSize++; m_heap = new MyHeapElement[maxSize+1]; m_heap[0] = new MyHeapElement(-1); } /** * returns the size of the heap. * @return the size */ public int size() { return m_heap[0].index; } /** * peeks at the first element. * @return the first element */ public MyHeapElement peek() { return m_heap[1]; } /** * returns the first element and removes it from the heap. * @return the first element * @throws Exception if no elements in heap */ public MyHeapElement get() throws Exception { if(m_heap[0].index==0) throw new Exception("No elements present in the heap"); MyHeapElement r = m_heap[1]; m_heap[1] = m_heap[m_heap[0].index]; m_heap[0].index--; downheap(); return r; } /** * adds the distance value to the heap. * * @param d the distance value * @throws Exception if the heap gets too large */ public void put(double d) throws Exception { if((m_heap[0].index+1)>(m_heap.length-1)) throw new Exception("the number of elements cannot exceed the "+ "initially set maximum limit"); m_heap[0].index++; m_heap[m_heap[0].index] = new MyHeapElement(d); upheap(); } /** * Puts an element by substituting it in place of * the top most element. * * @param d The distance value. * @throws Exception If distance is smaller than that of the head * element. */ public void putBySubstitute(double d) throws Exception { MyHeapElement head = get(); put(d); if(head.distance == m_heap[1].distance) { putKthNearest(head.distance); } else if(head.distance > m_heap[1].distance) { m_KthNearest = null; m_KthNearestSize = 0; initSize = 10; } else if(head.distance < m_heap[1].distance) { throw new Exception("The substituted element is greater than the "+ "head element. put() should have been called "+ "in place of putBySubstitute()"); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -