⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hnsrtreefileobj2.cpp

📁 SR-tree is an index structure for high-dimensional nearest neighbor queries
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*
 * HnSRTreeFileObj2.cc (static construction methods)
 * Copyright (C) 1999 Norio Katayama
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA
 *
 * 03/20/96 katayama@rd.nacsis.ac.jp
 * $Id: HnSRTreeFileObj2.cc,v 1.4 2002/09/13 03:50:54 katayama Exp $
 */

#include <math.h>
#include <stdio.h>
#ifndef _MSC_VER
#include <unistd.h>
#include <sys/types.h>
#endif
#include "HnSRTree/HnSRTreeFile.hh"
#include "HnSRTree/HnSRTreeFileObj.hh"
#include "HnSRTree/HnBinarySearch.hh"
#include "HnSRTree/HnQuickSelect.hh"
#include "HnSRTree/HnQuickSort.hh"
#include "HnSRTree/HnStatistics.hh"

/*
 * Construction (VAM)
 */

void
HnSRTreeFileObj::constructTree_VAM(HnPointVector &points,
				   HnDataItemVector &dataItems)
{
    REntry entry;

    entry = CreateVAMSRTree(points, dataItems);

    if ( entry.type == HnSRTreeBlock::NODE ) {
	info.setRootOffset(entry.node.getOffset());
	info.setHeight(entry.level + 1);

	writeSuperBlock();
    }
    else if ( entry.type == HnSRTreeBlock::LEAF ) {
	info.setRootOffset(entry.leaf.getOffset());
	info.setHeight(entry.level + 1);

	writeSuperBlock();
    }
    else {
	HnAbort("unexpected block type (%d).", entry.type);
    }
}

HnSRTreeFileObj::REntry
HnSRTreeFileObj::CreateVAMSRTree(HnPointVector &points,
				 HnDataItemVector &dataItems)
{
    REntry out_entry, *entries;
    int INTERNAL_FANOUT, BUCKET_SIZE, MAX_TREE_LEVELS;
    int start, end;

    INTERNAL_FANOUT = HnSRTreeNode::getMaxCount(info);
    BUCKET_SIZE = HnSRTreeLeaf::getMaxCount(info);
    start = 0;
    end = points.size();

    if ( points.size() <= BUCKET_SIZE ) {
	MAX_TREE_LEVELS = 1;
    }
    else {
	MAX_TREE_LEVELS = (int)ceil(log((double)points.size()/BUCKET_SIZE)/
				    log(INTERNAL_FANOUT));
    }

    entries = new REntry[INTERNAL_FANOUT * MAX_TREE_LEVELS];

    if ( debug ) {
	fprintf(stderr, "The length of entries[] = %d\n",
		INTERNAL_FANOUT * MAX_TREE_LEVELS);
    }

    BuildVAMSRTree(points, dataItems, start, end, entries, -1);
    out_entry = entries[0];

    delete[] entries;

    return out_entry;
}

int
HnSRTreeFileObj::BuildVAMSRTree(HnPointVector &points,
				HnDataItemVector &dataItems,
				int start, int end,
				REntry *entries, int level)
{
    int size, child_level, cscap, lo_size, lo_entries, hi_entries, out_entries;
    int BUCKET_SIZE = HnSRTreeLeaf::getMaxCount(info);
    int INTERNAL_FANOUT = HnSRTreeNode::getMaxCount(info);
    double LOG_INTERNAL_FANOUT = log(INTERNAL_FANOUT);
    int gscap;	/* grandchild subtree capacity */

    if ( debug ) {
	fprintf(stderr, "BuildVAMSRTree: starting ... \n");
	fprintf(stderr, "                BUCKET_SIZE=%d, INTERNAL_FANOUT=%d\n",
		BUCKET_SIZE, INTERNAL_FANOUT);
	fprintf(stderr, "                LOG_INTERNAL_FANOUT=%g\n",
		LOG_INTERNAL_FANOUT);
	fprintf(stderr, "                start=%d, end=%d, level=%d\n",
		start, end, level);
    }

    size = end - start;

    if ( level >= 0 ) {
	if ( size < BUCKET_SIZE / 2 ) {
	    HnAbort("BulidVAMSRTree: invalid size (%d).", size);
	}
    }

    if ( size <= BUCKET_SIZE ) {
	entries[0] = CreateRNodeBucket(points, dataItems, start, end);
	return 1;
    }
    if ( size <= 2 * BUCKET_SIZE ) {
	child_level = 0;
	cscap = 1;
	gscap = 0;
    }
    else {
	if ( info.getStaticAlgorithm() == HnSRTreeInfo::VAM_ORIGINAL ) {
	    child_level = (int)((log(size/(2*BUCKET_SIZE)))/
				LOG_INTERNAL_FANOUT);
	}
	else {
#if 0
	    child_level = (int)ceil(log(ceil((double)size/BUCKET_SIZE))/
				    LOG_INTERNAL_FANOUT) - 1;
#else
	    /* circumvent round-off error */
	    int numNodes = (int)ceil((double)size/BUCKET_SIZE);
	    int count = 1;
	    int exp = 0;
	    while ( count < numNodes ) {
		count = INTERNAL_FANOUT * count;
		exp ++;
	    }
	    child_level = exp - 1;
#endif
	}
	cscap = (int)(BUCKET_SIZE * pow(INTERNAL_FANOUT, child_level));
	gscap = cscap / INTERNAL_FANOUT;
    }

    if ( debug ) {
	fprintf(stderr, "BuildVAMSRTree: "
		"size=%d, child_level=%d, cscap=%d, gscap=%d\n",
		size, child_level, cscap, gscap);
    }

    lo_size = SplitDataset(points, dataItems, start, end, size, cscap, gscap);
    lo_entries = BuildVAMSRTree(points, dataItems,
				start, start + lo_size,
				entries, child_level);
    hi_entries = BuildVAMSRTree(points, dataItems,
				start + lo_size, end,
				entries + lo_entries, child_level);
    out_entries = lo_entries + hi_entries;

    if ( debug ) {
	fprintf(stderr,
		"BuildVAMSRTree: "
		"lo_size=%d, lo_entries=%d, hi_entries=%d, out_entries=%d\n",
		lo_size, lo_entries, hi_entries, out_entries);
    }

    if ( level == -1 || child_level < level ) {
	if ( debug ) {
	    /*
	     * verify that the minimum number of nodes are used.
	     */
	    int i;

	    if ( end < points.size() ) {
		/* left side */
		for ( i=0; i<out_entries; i++ ) {
		    if ( child_level == 0 ) {
			/* leaf */
			if ( entries[i].leaf.getCount() < BUCKET_SIZE ) {
			    HnAbort("a leaf is not fully utilized.");
			}
		    }
		    else {
			/* node */
			if ( entries[i].node.getCount() < INTERNAL_FANOUT ) {
			    HnAbort("a node is not fully utilized.");
			}
		    }
		}
	    }
	    else {
		/* right side */
		for ( i=0; i<out_entries-2; i++ ) {
		    if ( child_level == 0 ) {
			/* leaf */
			if ( entries[i].leaf.getCount() < BUCKET_SIZE ) {
			    HnAbort("a leaf is not fully utilized.");
			}
		    }
		    else {
			/* node */
			if ( entries[i].node.getCount() < INTERNAL_FANOUT ) {
			    HnAbort("a node is not fully utilized.");
			}
		    }
		}
		for ( i=out_entries-2; i<out_entries; i++ ) {
		    if ( child_level == 0 ) {
			/* leaf */
			if ( entries[i].leaf.getCount() < BUCKET_SIZE/2 ) {
			    HnAbort("a leaf is not fully utilized.");
			}
		    }
		    else {
			/* node */
			if ( entries[i].node.getCount() < INTERNAL_FANOUT/2 ) {
			    HnAbort("a node is not fully utilized.");
			}
		    }
		}
	    }   
	}

	entries[0] = CreateRNodeFromEntries(entries, out_entries);
	out_entries = 1;
    }

    if ( debug ) {
	fprintf(stderr, "BuildVAMSRTree: returning ... \n");
    }

    return out_entries;
}

HnSRTreeFileObj::REntry
HnSRTreeFileObj::CreateRNodeBucket(HnPointVector &points,
				   HnDataItemVector &dataItems,
				   int start, int end)
{
    HnSRTreeLeaf leaf;
    int i;
    REntry entry;

    if ( debug ) {
	fprintf(stderr,
		"CreateRNodeBucket: allocating a leaf (start=%d, end=%d)\n",
		start, end);
    }

    leaf = allocateLeaf();

    for ( i=start; i<end; i++ ) {
	leaf.addElement(points[i], dataItems[i]);
    }

    writeLeaf(leaf);

    entry.type = HnSRTreeBlock::LEAF;
    entry.leaf = leaf;
    entry.node = HnSRTreeNode::null;
    entry.level = 0;

    return entry;
}

HnSRTreeFileObj::REntry
HnSRTreeFileObj::CreateRNodeFromEntries(REntry *entries, int count)
{
    HnSRTreeNode node;
    int i;
    REntry entry;

    if ( debug ) {
	fprintf(stderr,
		"CreateRNodeFromEntries: allocating a node (count=%d)\n",
		count);
    }

    node = allocateNode();

    /* confirm the balance of the tree */
    for ( i=1; i<count; i++ ) {
	if ( entries[i].level != entries[0].level ) {
	    int j;

	    for ( j=0; j<count; j++ ) {
		fprintf(stderr, "entries[%d].level=%d\n",
			j, entries[j].level);
	    }

	    HnAbort("subtree is not balanced.");
	}
    }

    for ( i=0; i<count; i++ ) {
	switch ( entries[i].type ) {
	case HnSRTreeBlock::NODE:
	    node.addElement(entries[i].node.getCluster(),
			    entries[i].node.getOffset());
	    break;
	case HnSRTreeBlock::LEAF:
	    node.addElement(entries[i].leaf.getCluster(),
			    entries[i].leaf.getOffset());
	    break;
	default:
	    HnAbort("unexpected block type (%d).", entries[i].type);
	}
    }

    writeNode(node);

    entry.type = HnSRTreeBlock::NODE;
    entry.leaf = HnSRTreeLeaf::null;
    entry.node = node;
    entry.level = entries[0].level + 1;

    return entry;
}

int
HnSRTreeFileObj::SplitDataset(HnPointVector &points,
			      HnDataItemVector &dataItems,
			      int start, int end, int size,
			      int cscap, int gscap)
{
    switch ( info.getStaticAlgorithm() ) {
    case HnSRTreeInfo::VAM_ORIGINAL:
    case HnSRTreeInfo::VAM_CORRECTED:
	return SplitDataset_VAM(points, dataItems,
				start, end, size, cscap, gscap);
    default:
	HnAbort("unexpected value for ``staticAlgorithm'' (%d).",
		info.getStaticAlgorithm());
    }
}

int
HnSRTreeFileObj::SplitDataset_VAM(HnPointVector &points,
				  HnDataItemVector &dataItems,
				  int start, int end, int size,
				  int cscap, int gscap)
{
    int lo_size, split_dim;

    if ( info.getStaticAlgorithm() == HnSRTreeInfo::VAM_ORIGINAL ) {
	lo_size = cscap * (size / (2 * cscap));
    }
    else {
	if ( gscap != 0 && size <= 2 * cscap ) {
	    /* split the dataset by the grandchild subtree capacity */
	    lo_size = gscap * (int)floor(ceil((double)size/gscap)/2);
	}
	else {
	    /* split the dataset by the child subtree capacity */
	    lo_size = cscap * (int)floor(ceil((double)size/cscap)/2);
	}
    }

    split_dim = FindMaxVarianceDimension(points, start, end);
    SelectOnDimension_VAM(points, dataItems, split_dim, start, end, lo_size);

    if ( debug ) {
	fprintf(stderr,
		"SplitDataset: size=%d, cscap=%d, gscap=%d, lo_size=%d.\n",
		size, cscap, gscap, lo_size);
    }

    return lo_size;
}

void
HnSRTreeFileObj::SelectOnDimension_VAM(HnPointVector &points,
				       HnDataItemVector &dataItems,
				       int split_dim,
				       int start, int end, int lo_size)
{
    if ( debug ) {
	fprintf(stderr, "SelectOnDimension: "
		"split_dim=%d, start=%d, end=%d, lo_size=%d.\n",
		split_dim, start, end, lo_size);
    }

    selectPoint(points, dataItems, start, end - start, split_dim, lo_size);
}

int
HnSRTreeFileObj::FindMaxVarianceDimension(HnPointVector &points,
					  int start, int end)
{
    return getMaxVarianceAxis(points, start, end - start);
}

/*
 * searchPoint()
 */

class HnSRTreeSearchPoint: public HnBinarySearch {
private:
    HnPointVector points;
    int offset;
    int count;
    double keyCoord;
    int axis;
    int order;

private:
    int getNumElements(void) {
	return count;
    }
    int compareToElementAt(int i) {
	double coord = points[offset + i].getCoordAt(axis);

	if ( order > 0 ) {
	    if ( keyCoord == coord) {
		return 0;
	    }
	    else if ( keyCoord < coord ) {
		return -1;
	    }
	    else {
		return 1;
	    }
	}
	else {
	    if ( keyCoord == coord ) {
		return 0;
	    }
	    else if ( keyCoord < coord ) {
		return 1;
	    }
	    else {
		return -1;
	    }
	}
    }   

    HnSRTreeSearchPoint(const HnPointVector &points, int offset, int count,
			double keyCoord, int axis, int order) {
	this->points = points;
	this->offset = offset;
	this->count = count;
	this->keyCoord = keyCoord;
	this->axis = axis;
	this->order = order;
    }

public:
    static void search(const HnPointVector &points, int offset, int count,
		       double keyCoord, int axis, int order,
		       HnBool *found_return, int *index_return) {
	HnSRTreeSearchPoint searcher(points, offset, count,
				     keyCoord, axis, order);
	searcher.searchElements(found_return, index_return);
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -