📄 covertree.java
字号:
protected double compare(final int p1, final int p2, Stack<d_node> cover_set) { return cover_set.element(p1).dist - cover_set.element(p2).dist; } /** * Half-sorts a cover set, so that nodes nearer to the query * are at the front. * @param cover_set The cover set to sort. */ protected void halfsort(Stack<d_node> cover_set) { if(cover_set.length <= 1) return; int start=0; int hi = cover_set.length-1; int right = hi; int left; while (right > start) { int mid = start + ((hi - start) >> 1); boolean jumpover = false; if (compare(mid, start, cover_set) < 0.0) SWAP(mid, start, cover_set); if (compare(hi, mid, cover_set) < 0.0) SWAP(mid, hi, cover_set); else jumpover = true; if (!jumpover && compare(mid, start, cover_set) < 0.0) SWAP(mid, start, cover_set); jump_over: ; left = start + 1; right = hi - 1; do { while (compare(left, mid, cover_set) < 0.0) left++; while (compare(mid, right, cover_set) < 0.0) right--; if (left < right) { SWAP(left, right, cover_set); if (mid == left) mid = right; else if (mid == right) mid = left; left++; right--; } else if (left == right) { left++; right--; break; } } while (left <= right); hi = right; } } /** * Function to check if a child node can be inside a query ball, * without calculating the child node's distance to the query. * This further avoids unnecessary distance calculation. * * @param parent_query_dist The distance of parent to the query * @param child_parent_dist The distance of child to the parent. * @param upper_bound The distance to the query of the best kth * NN found so far. * @return true If child can be inside the query ball. */ protected boolean shell(double parent_query_dist, double child_parent_dist, double upper_bound) { return parent_query_dist - child_parent_dist <= upper_bound; } /** * This functions adds nodes for inspection at the next level during NN * search. The internal nodes are added to one of the cover sets (at * the level of the child node which is added) and leaf nodes are * added to the zero set. * * An optimization to consider: * Make all distance evaluations occur in descend. * * Instead of passing a cover_set, pass a stack of cover sets. The * last element holds d_nodes with your distance. The next lower * element holds a d_node with the distance to your query parent, * next = query grand parent, etc.. * * Compute distances in the presence of the tighter upper bound. * @param query The query (in shape of a cover tree node, as we * are doing batch searching). * @param upper_k Heap containing distances of best k-NNs found so * far. * @param current_scale The current scale/level being looked at in * the tree. * @param max_scale The max scale/level that has so far been looked * at. * @param cover_sets The cover sets of tree nodes for each level of * our trees for. * @param zero_set The set containing leaf nodes. * @return A new max_scale, if we descend to a deeper level. * @throws Exception If there is some problem (in updating the * heap upper_k). */ protected int descend(final CoverTreeNode query, MyHeap upper_k, int current_scale, int max_scale, // amk14comment: make sure this gets // passed by reference in Java Stack<Stack<d_node>> cover_sets, // amk14comment: contains children in // set Q in paper Stack<d_node> zero_set) // amk14comment: zeroset contains the children at // the lowest level i.e. -infinity throws Exception { d_node parent; Stack<d_node> cover_set_currentscale = getCoverSet(current_scale, cover_sets); for (int i = 0; i < cover_set_currentscale.length; i++) { parent = cover_set_currentscale.element(i); CoverTreeNode par = parent.n; double upper_dist = upper_k.peek().distance + query.max_dist + query.max_dist; // *upper_bound + query->max_dist + query->max_dist; if (parent.dist <= upper_dist + par.max_dist) { CoverTreeNode chi; if (par == m_Root && par.num_children == 0) // if our tree consists of // only one root(which is // also leaf) node chi = par; else chi = par.children.element(0); if (parent.dist <= upper_dist + chi.max_dist) { // amk14comment: looking // at child_0 (which is // the parent itself) if (chi.num_children > 0) { if (max_scale < chi.scale) { max_scale = chi.scale; } d_node temp = new d_node(parent.dist, chi); getCoverSet(chi.scale, cover_sets).push(temp); if (m_TreeStats != null) m_TreeStats.incrIntNodeCount(); } else if (parent.dist <= upper_dist) { d_node temp = new d_node(parent.dist, chi); zero_set.push(temp); if (m_TreeStats != null) m_TreeStats.incrLeafCount(); } } for (int c = 1; c < par.num_children; c++) { chi = par.children.element(c); double upper_chi = upper_k.peek().distance + chi.max_dist + query.max_dist + query.max_dist; // *upper_bound + chi.max_dist // + query.max_dist + // query.max_dist; if (shell(parent.dist, chi.parent_dist, upper_chi)) { // amk14comment:parent_query_dist // - // child_parent_dist // <= upper_chi - if child can be // inside the shrunk query ball // NOT the same as above parent->dist <= upper_dist + chi->max_dist double d = Math.sqrt(m_DistanceFunction.distance(query.p(), chi.p(), upper_chi * upper_chi, m_TreeStats)); if (m_TreeStats != null) m_TreeStats.incrPointCount(); if (d <= upper_chi) { //if child is inside the shrunk query ball if (d < upper_k.peek().distance) // *upper_bound) update(upper_k, d); if (chi.num_children > 0) { if (max_scale < chi.scale) { max_scale = chi.scale; } d_node temp = new d_node(d, chi); getCoverSet(chi.scale, cover_sets).push(temp); if (m_TreeStats != null) m_TreeStats.incrIntNodeCount(); } else if (d <= upper_chi - chi.max_dist) { d_node temp = new d_node(d, chi); zero_set.push(temp); if (m_TreeStats != null) m_TreeStats.incrLeafCount(); } }//end if(d<=upper_chi) }//end if(shell(parent.dist,... }//end for(child_1 to n) }//end if(parent.dist<=upper_dist.. }//end for(covers_sets[current_scale][i]) return max_scale; } /** * Does a brute force NN search on the nodes in the given zero set. * A zero set might have some nodes added to it that were not k-NNs, * so need to do a brute-force to pick only the k-NNs (without * calculating distances, as each node in the zero set already had * its distance calculated to the query, which is stored with the * node). * * @param k The k in kNN. * @param query The query. * @param zero_set The zero set on which the brute force NN search * is performed. * @param upper_k The heap storing distances of k-NNs found during * the search. * @param results The returned k-NNs. * @throws Exception If there is somem problem. */ protected void brute_nearest(final int k, final CoverTreeNode query, Stack<d_node> zero_set, MyHeap upper_k, Stack<NeighborList> results) throws Exception { if (query.num_children > 0) { Stack<d_node> new_zero_set = new Stack<d_node>(); CoverTreeNode query_chi = query.children.element(0); brute_nearest(k, query_chi, zero_set, upper_k, results); MyHeap new_upper_k = new MyHeap(k); for (int i = 1; i < query.children.length; i++) { query_chi = query.children.element(i); setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k); copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set); brute_nearest(k, query_chi, new_zero_set, new_upper_k, results); } } else { NeighborList temp = new NeighborList(k); d_node ele; for (int i = 0; i < zero_set.length; i++) { ele = zero_set.element(i); if (ele.dist <= upper_k.peek().distance) { temp.insertSorted(ele.dist, ele.n.p()); // temp.push(ele.n.p()); } } results.push(temp); } } /** * Performs a recursive k-NN search for a given batch of queries provided in the * form of a cover tree. P.S.: This function should not be called from outside. * Outside classes should use kNearestNeighbours() instead. * * @param k The number of NNs to find. * @param query_node The node of the query tree to start the search from. * @param cover_sets The set of sets that contains internal * nodes that were found to be inside the query ball at previous scales/levels * (intially there would be just the root node at root level). * @param zero_set The set that'll contain the leaf nodes that are found to * be inside the query ball. * @param current_scale The level/scale to do the search from (this value * would be used to inspect the cover set in the provided set of cover sets). * @param max_scale The max scale/level that has so far been inspected. * @param upper_k The heap containing distances of the best k-NNs found so * far (initialized to Double.POSITIVE_INFINITY). * @param results The list of returned k-NNs. * @throws Exception If there is some problem during the search. */ protected void internal_batch_nearest_neighbor(final int k, final CoverTreeNode query_node, Stack<Stack<d_node>> cover_sets, Stack<d_node> zero_set, int current_scale, int max_scale, MyHeap upper_k, Stack<NeighborList> results) throws Exception { if (current_scale > max_scale) { // All remaining points are in the zero set. brute_nearest(k, query_node, zero_set, upper_k, results); } else { // Our query_node has too much scale. Reduce. if (query_node.scale <= current_scale && query_node.scale != 100) { // amk14comment:if j>=i in paper CoverTreeNode query_chi; Stack<d_node> new_zero_set = new Stack<d_node>(); Stack<Stack<d_node>> new_cover_sets = new Stack<Stack<d_node>>(); MyHeap new_upper_k = new MyHeap(k); for (int i = 1; i < query_node.num_children; i++) { //processing child_1 and onwards query_chi = query_node.children.element(i); setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k); //copy the zero set that satisfy a certain bound to the new zero set copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set); //copy the coversets[current_scale] nodes that satisfy a certain //bound to the new_cover_sets[current_scale] copy_cover_sets(query_chi, new_upper_k, cover_sets, new_cover_sets, current_scale, max_scale); //search for the query_node child in the nodes nearer to it. internal_batch_nearest_neighbor(k, query_chi, new_cover_sets, new_zero_set, current_scale, max_scale, new_upper_k, results); } new_cover_sets = null; new_zero_set = null; new_upper_k = null; // now doing child_0 //which is the parent itself, that's why we don't // need new_zero_set or new_cover_sets internal_batch_nearest_neighbor(k, query_node.children.element(0), cover_sets, zero_set, current_scale, max_scale, upper_k, results); } else { // reduce cover set scale -- amk14comment: if j<i in paper Stack<d_node> cover_set_i = getCoverSet(current_scale, cover_sets); // println("sorting"); halfsort(cover_set_i); max_scale = descend(query_node, upper_k, current_scale, max_scale, cover_sets, zero_set); cover_set_i.clear(); current_scale++; internal_batch_nearest_neighbor(k, query_node, cover_sets, zero_set, current_scale, max_scale, upper_k, results); } } } /** * Performs k-NN search for a batch of queries provided in the form * of a cover tree. P.S.: Outside classes should call * kNearestNeighbours(). * * @param k The number of k-NNs to find. * @param tree_root The root of the cover tree on which k-NN search * is to be performed. * @param query_root The root of the cover tree consisting of queries. * @param results The list of returned k-NNs. * @throws Exception If there is some problem during the search. */ protected void batch_nearest_neighbor(final int k, CoverTreeNode tree_root, CoverTreeNode query_root, Stack<NeighborList> results) throws Exception { //amk14comment: These contain the covering nodes at each level Stack<Stack<d_node>> cover_sets = new Stack<Stack<d_node>>(100); //amk14comment: These contain the nodes thought to be nearest at the leaf level Stack<d_node> zero_set = new Stack<d_node>(); MyHeap upper_k = new MyHeap(k); //probably not needed //amk14comment:initializes the array to MAXFLOAT setter(upper_k, Double.POSITIVE_INFINITY, k); // amk14comment:distance from top query point to top node point double treeroot_to_query_dist = Math.sqrt(m_DistanceFunction.distance( query_root.p(), tree_root.p(), Double.POSITIVE_INFINITY)); // amk14comment:probably stores the kth smallest distances encountered so // far update(upper_k, treeroot_to_query_dist); d_node temp = new d_node(treeroot_to_query_dist, tree_root); getCoverSet(0, cover_sets).push(temp); // incrementing counts for the root node if (m_TreeStats != null) { m_TreeStats.incrPointCount(); if (tree_root.num_children > 0) m_TreeStats.incrIntNodeCount(); else m_TreeStats.incrLeafCount(); } internal_batch_nearest_neighbor(k, query_root, cover_sets, zero_set, 0, 0, upper_k, results); } /** * Performs k-NN serach for a single given query/test Instance. * * @param target The query/test instance. * @param k Number of k-NNs to find. * @return List of k-NNs. * @throws Exception If there is some problem during the search * for k-NNs. */ protected NeighborList findKNearest(final Instance target, final int k) throws Exception { Stack<d_node> cover_set_current = new Stack<d_node>(), cover_set_next, zero_set = new Stack<d_node>(); CoverTreeNode parent, child; d_node par; MyHeap upper_k = new MyHeap(k); double d = Math.sqrt(m_DistanceFunction.distance(m_Root.p(), target, Double.POSITIVE_INFINITY, m_TreeStats)), upper_bound; cover_set_current.push(new d_node(d, m_Root)); setter(upper_k, Double.POSITIVE_INFINITY, k); this.update(upper_k, d); //updating stats for the root node if(m_TreeStats!=null) { if(m_Root.num_children > 0) m_TreeStats.incrIntNodeCount(); else m_TreeStats.incrLeafCount();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -