📄 middleoutconstructor.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * MiddleOutConstructor.java * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand */package weka.core.neighboursearch.balltrees;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.Randomizable;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * The class that builds a BallTree middle out.<br/> * <br/> * For more information see also:<br/> * <br/> * Andrew W. Moore: The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data. In: UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, San Francisco, CA, USA, 397-405, 2000.<br/> * <br/> * Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Moore2000, * address = {San Francisco, CA, USA}, * author = {Andrew W. Moore}, * booktitle = {UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence}, * pages = {397-405}, * publisher = {Morgan Kaufmann Publishers Inc.}, * title = {The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data}, * year = {2000} * } * * @mastersthesis{Kibriya2007, * address = {Hamilton, New Zealand}, * author = {Ashraf Masood Kibriya}, * school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato}, * title = {Fast Algorithms for Nearest Neighbour Search}, * year = {2007} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * The seed for the random number generator used * in selecting random anchor. * (default: 1)</pre> * * <pre> -R * Use randomly chosen initial anchors.</pre> * <!-- options-end --> * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.1 $ */public class MiddleOutConstructor extends BallTreeConstructor implements Randomizable, TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = -8523314263062524462L; /** Seed form random number generator. */ protected int m_RSeed = 1; /** * The random number generator for selecting * the first anchor point randomly * (if selecting randomly). */ protected Random rand = new Random(m_RSeed); /** * True if the initial anchor is chosen randomly. False if it is the furthest * point from the mean/centroid. */ protected boolean m_RandomInitialAnchor = true; /** * Creates a new instance of MiddleOutConstructor. */ public MiddleOutConstructor() { } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "The class that builds a BallTree middle out.\n\n" + "For more information see also:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Andrew W. Moore"); result.setValue(Field.TITLE, "The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data"); result.setValue(Field.YEAR, "2000"); result.setValue(Field.BOOKTITLE, "UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence"); result.setValue(Field.PAGES, "397-405"); result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers Inc."); result.setValue(Field.ADDRESS, "San Francisco, CA, USA"); additional = result.add(Type.MASTERSTHESIS); additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya"); additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search"); additional.setValue(Field.YEAR, "2007"); additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato"); additional.setValue(Field.ADDRESS, "Hamilton, New Zealand"); return result; } /** * Builds a ball tree middle out. * @return The root node of the tree. * @throws Exception If there is problem building * the tree. */ public BallNode buildTree() throws Exception { m_NumNodes = m_MaxDepth = m_NumLeaves = 0; BallNode root = buildTreeMiddleOut(0, m_Instances.numInstances()-1); return root; } /** * Builds a ball tree middle out from the * portion of the master index array given * by supplied start and end index. * @param startIdx The start of the portion * in master index array. * @param endIdx the end of the portion in * master index array. * @return The root node of the built tree. * @throws Exception If there is some * problem building the tree. */ protected BallNode buildTreeMiddleOut(int startIdx, int endIdx) throws Exception { int numInsts = endIdx - startIdx + 1; if(numInsts <= m_MaxInstancesInLeaf) { //make leaf Instance pivot; BallNode node = new BallNode(startIdx, endIdx, m_NumNodes, (pivot=BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList, m_Instances)), BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances, pivot, m_DistanceFunction) ); return node; } int numAnchors = (int) Math.round(Math.sqrt(numInsts)); Vector anchors; //create anchor's hierarchy if(numAnchors > 1) { anchors = new Vector(numAnchors); createAnchorsHierarchy(anchors, numAnchors, startIdx, endIdx); }//end anchors hierarchy else { Instance pivot; BallNode node = new BallNode(startIdx, endIdx, m_NumNodes, (pivot=BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList, m_Instances)), BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances, pivot, m_DistanceFunction) ); return node; } BallNode node = mergeNodes(anchors, startIdx, endIdx); buildLeavesMiddleOut(node); return node; } /** * Creates an anchors hierarchy from a portion * of master index array. * * @param anchors The vector for putting the anchors * into. * @param numAnchors The number of anchors to create. * @param startIdx The start of the portion of master * index array. * @param endIdx The end of the portion of master * index array. * @throws Exception If there is some problem in creating * the hierarchy. */ protected void createAnchorsHierarchy(Vector anchors, final int numAnchors, final int startIdx, final int endIdx) throws Exception { TempNode anchr1 = m_RandomInitialAnchor ? getRandomAnchor(startIdx, endIdx) : getFurthestFromMeanAnchor(startIdx, endIdx); TempNode amax = anchr1; //double maxradius = anchr1.radius; TempNode newAnchor; Vector anchorDistances = new Vector(numAnchors-1); anchors.add(anchr1); //creating anchors while(anchors.size() < numAnchors) { //create new anchor newAnchor = new TempNode(); newAnchor.points = new MyIdxList(); Instance newpivot = m_Instances.instance(((ListNode)amax.points.getFirst()).idx); newAnchor.anchor = newpivot; newAnchor.idx = ((ListNode)amax.points.getFirst()).idx; setInterAnchorDistances(anchors, newAnchor, anchorDistances); stealPoints(newAnchor, anchors, anchorDistances); newAnchor.radius = ((ListNode)newAnchor.points.getFirst()).distance; anchors.add(newAnchor); //find new amax amax = (TempNode)anchors.elementAt(0); for(int i=1; i<anchors.size(); i++) { newAnchor = (TempNode)anchors.elementAt(i); if(newAnchor.radius > amax.radius) amax = newAnchor; }//end for }//end while } /** * Applies the middle out build procedure to * the leaves of the tree. The leaf nodes * should be the ones that were created by * createAnchorsHierarchy(). The process * continues recursively for the leaves * created for each leaf of the given tree * until for some leaf node <= * m_MaxInstancesInLeaf instances remain * in the leaf. * * @param node The root of the tree. * @throws Exception If there is some problem * in building the tree leaves. */ protected void buildLeavesMiddleOut(BallNode node) throws Exception { if(node.m_Left!=null && node.m_Right!=null) { //if an internal node buildLeavesMiddleOut(node.m_Left); buildLeavesMiddleOut(node.m_Right); } else if(node.m_Left!=null || node.m_Right!=null) { throw new Exception("Invalid leaf assignment. Please check code"); } else { //if node is a leaf BallNode n2 = buildTreeMiddleOut(node.m_Start, node.m_End); if(n2.m_Left!=null && n2.m_Right!=null) { node.m_Left = n2.m_Left; node.m_Right = n2.m_Right; buildLeavesMiddleOut(node); //the stopping condition in buildTreeMiddleOut will stop the recursion, //where it won't split a node at all, and we won't recurse here. } else if(n2.m_Left!=null || n2.m_Right!=null) throw new Exception("Invalid leaf assignment. Please check code"); } } /** * Merges nodes created by createAnchorsHierarchy() * into one top node. * * @param list List of anchor nodes. * @param startIdx The start of the portion of * master index array containing these anchor * nodes. * @param endIdx The end of the portion of master * index array containing these anchor nodes. * @return The top/root node after merging * the given anchor nodes. * @throws Exception IF there is some problem in * merging. */ protected BallNode mergeNodes(Vector list, int startIdx, int endIdx) throws Exception { for(int i=0; i<list.size(); i++) { TempNode n = (TempNode) list.get(i); n.anchor = calcPivot(n.points, new MyIdxList(), m_Instances); n.radius = calcRadius(n.points, new MyIdxList(), n.anchor, m_Instances); } double minRadius, tmpRadius; //tmpVolume, minVolume; Instance pivot, minPivot=null; TempNode parent; int min1=-1, min2=-1; while(list.size() > 1) { //main merging loop minRadius=Double.POSITIVE_INFINITY; for(int i=0; i<list.size(); i++) { TempNode first = (TempNode) list.get(i); for(int j=i+1; j<list.size(); j++) { TempNode second = (TempNode) list.get(j); pivot = calcPivot(first, second, m_Instances); tmpRadius = calcRadius(first, second); //calcRadius(first.points, second.points, pivot, m_Instances); if(tmpRadius < minRadius) { //(tmpVolume < minVolume) { minRadius = tmpRadius; //minVolume = tmpVolume; minPivot = pivot; min1=i; min2=j; //minInstList = tmpInstList; } }//end for(j) }//end for(i) parent = new TempNode(); parent.left = (TempNode) list.get(min1); parent.right = (TempNode) list.get(min2); parent.anchor = minPivot; parent.radius = calcRadius(parent.left.points, parent.right.points, minPivot, m_Instances); //minRadius; parent.points = parent.left.points.append(parent.left.points, parent.right.points); list.remove(min1); list.remove(min2-1); list.add(parent); }//end while TempNode tmpRoot = (TempNode)list.get(list.size()-1); if((endIdx-startIdx+1)!= tmpRoot.points.length()) { throw new Exception("Root nodes instance list is of irregular length. " + "Please check code. Length should " + "be: " + (endIdx-startIdx+1) + " whereas it is found to be: "+tmpRoot.points.length()); } for(int i=0; i<tmpRoot.points.length(); i++) { m_InstList[startIdx+i] = ((ListNode)tmpRoot.points.get(i)).idx; } BallNode node = makeBallTreeNodes(tmpRoot, startIdx, endIdx, 0); return node; } /** * Makes BallTreeNodes out of TempNodes. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -