⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hnsrtreefileobj1.cpp

📁 SR-tree is an index structure for high-dimensional nearest neighbor queries
💻 CPP
📖 第 1 页 / 共 3 页
字号:
    maxCount = numClusters - minCount;

    /* initialize flags */
    if ( flags != NULL ) {
	delete[] flags;
    }
    flags = new SplitFlag[numClusters];

    for ( i=0; i<numClusters; i++ ) {
	flags[i] = SPLIT_NONE;
    }

    max.axis = -1;
    max.var = -1;

    /* calculate variance */
    for ( axis=0; axis<dimension; axis++ ) {
	double avg, var;
	double sum = 0;
	double sum2 = 0;
	int total = 0;

	for ( i=0; i<numClusters; i++ ) {
	    double coord = clusters[i].getCentroid().getCoordAt(axis);

	    /*
	     * NOTE:
	     *      In the calculation of the variance,
	     *	    it is assumed that all points are located
             *      at the centroid of the cluster.
	     */

	    sum += coord * clusters[i].getWeight();
	    sum2 += coord * coord * clusters[i].getWeight();
	    total += clusters[i].getWeight();
	}

	avg = sum / total;
	var = sum2 / total - avg * avg;

	if ( max.axis == -1 || var > max.var ) {
	    max.axis = axis;
	    max.var = var;
	}
    }

    /* sort clusters along the max variance axis */
    entries = (HnSRTreeAxisSortEntry *)
	HnMalloc(sizeof(HnSRTreeAxisSortEntry) * numClusters);

    for ( i=0; i<numClusters; i++ ) {
	entries[i].index = i;
	entries[i].coord = clusters[i].getCentroid().getCoordAt(max.axis);
    }

    qsort(entries, numClusters, sizeof(HnSRTreeAxisSortEntry),
	  HnSRTreeCompareAxisSortEntries);

    /* choose split point */
    min.count = -1;
    min.leftVar = 0;
    min.rightVar = 0;

    for ( count=minCount; count<=maxCount; count++ ) {
	double leftVar, rightVar;

	/* calculate the sum of variances on the left side */
	leftVar = 0;

	for ( axis=0; axis<dimension; axis++ ) {
	    double sum, sum2, avg, var;
	    int total;

	    /*
	     * NOTE:
	     *      In the calculation of the variance,
	     *	    it is assumed that all points are located
	     *      at the centroid of the cluster.
	     */

	    sum = 0;
	    sum2 = 0;
	    total = 0;

	    for ( i=0; i<count; i++ ) {
		HnSRTreeCluster &cluster = clusters[entries[i].index];
		double coord = cluster.getCentroid().getCoordAt(axis);
		int weight = cluster.getWeight();

		sum += coord * weight;
		sum2 += coord * coord * weight;
		total += weight;
	    }

	    avg = sum / total;
	    var = sum2 / total - avg * avg;

	    leftVar += var;
	}

	/* calculate the sum of variances on the right side */
	rightVar = 0;

	for ( axis=0; axis<dimension; axis++ ) {
	    double sum, sum2, avg, var;
	    int total;

	    /*
	     * NOTE:
	     *	    In the calculation of the variance,
	     *	    it is assumed that all points are located
	     *	    at the centroid of the cluster.
	     */

	    sum = 0;
	    sum2 = 0;
	    total = 0;

	    for ( i=count; i<numClusters; i++ ) {
		HnSRTreeCluster &cluster = clusters[entries[i].index];
		double coord = cluster.getCentroid().getCoordAt(axis);
		int weight = cluster.getWeight();

		sum += coord * weight;
		sum2 += coord * coord * weight;
		total += weight;
	    }

	    avg = sum / total;
	    var = sum2 / total - avg * avg;

	    rightVar += var;
	}

	if ( min.count < 0 ) {
	    min.count = count;
	    min.leftVar = leftVar;
	    min.rightVar = rightVar;
	}
	else {
	    if ( leftVar + rightVar < min.leftVar + min.rightVar ) {
		min.count = count;
		min.leftVar = leftVar;
		min.rightVar = rightVar;
	    }
	}
    }

    /* make split flags */
    i=0;
    while ( i<min.count ) {
	flags[entries[i].index] = LEFT;
	i++;
    }
    while ( i<numClusters ) {
	flags[entries[i].index] = RIGHT;
	i++;
    }

    if ( debug ) {
	fprintf(stderr, "HnSRTreeFileObj::selectClusters:\n");
	fprintf(stderr, "    LEFT\n");
	for ( i=0; i<numClusters; i++ ) {
	    if ( flags[i] == LEFT ) {
		fprintf(stderr,
			"    clusters[%d].getCentroid().getCoordAt(%d) = %g\n",
			i, max.axis,
			clusters[i].getCentroid().getCoordAt(max.axis));
	    }
	}
	fprintf(stderr, "    RIGHT\n");
	for ( i=0; i<numClusters; i++ ) {
	    if ( flags[i] == RIGHT ) {
		fprintf(stderr,
			"    clusters[%d].getCentroid().getCoordAt(%d) = %g\n",
			i, max.axis,
			clusters[i].getCentroid().getCoordAt(max.axis));
	    }
	}
    }

    *flags_return = flags;

    HnFree(entries, sizeof(HnSRTreeAxisSortEntry) * numClusters);
}

/*
 * Remove
 */

void
HnSRTreeFileObj::remove(const HnPoint &point, const HnDataItem &dataItem)
{
    HnSRTreeStack stack;
    HnSRTreeLeaf leaf;
    HnSRTreeNode node;
    int cursor, maxCount, minCount;
    int level;
    HnBool underflow;
    HnSRTreeCluster cluster;
    int i;

    /* search point */
    if ( (stack = searchPoint(point, dataItem)) == HnSRTreeStack::null )
	HnAbort("the given point/dataItem pair is not found in the tree.");

    if ( debug ) {
	fprintf(stderr, "HnSRTreeFile::remove: point is found.\n");
    }

    /* remove point */
    leaf = stack.getTopLeaf();
    cursor = stack.getCursor();
    stack.pop();

    leaf.removeElementAt(cursor);
	
    if ( stack.getDepth() == 0 ) {
	writeLeaf(leaf);

	if ( debug ) {
	    fprintf(stderr, "HnSRTreeFile::remove: removal is finished.\n");
	}

	return;
    }

    /* condense tree */
    reinsertList = new_HnSRTreeReinsertVector();
    reinsertedBlocks = new_HnFTlongVector();

    maxCount = HnSRTreeLeaf::getMaxCount(info);
    minCount = maxCount * info.getSplitFactor() / 100;
    underflow = (leaf.getCount() < minCount);

    if ( underflow ) {
	if ( debug ) {
	    fprintf(stderr, "HnSRTreeFile::remove: "
		    "underflow (count: %d, minCount: %d) "
		    "occurred in the leaf (offset: 0x%08X).\n",
		    leaf.getCount(), minCount,
		    (int)leaf.getOffset());
	}

	for ( i=0; i<leaf.getCount(); i++ ) {
	    HnPoint leafPoint = leaf.getPointAt(i);
	    HnDataItem leafDataItem = leaf.getDataItemAt(i);

	    reinsertList.addElement(new_HnSRTreeReinsert(leafPoint,
							 leafDataItem));
	}
	releaseBlock(leaf.getOffset());
    }
    else {
	writeLeaf(leaf);
	cluster = leaf.getCluster();
    }

    /* intermediate nodes */
    while ( stack.getDepth() > 1 ) {
	node = stack.getTopNode();
	cursor = stack.getCursor();
	level = info.getHeight() - stack.getDepth();
	stack.pop();

	if ( underflow ) {
	    node.removeElementAt(cursor);

	    maxCount = HnSRTreeNode::getMaxCount(info);
	    minCount = maxCount * info.getSplitFactor() / 100;
	    underflow = (node.getCount() < minCount);

	    if ( underflow ) {
		if ( debug ) {
		    fprintf(stderr,
			    "HnSRTreeFile::remove: "
			    "underflow (count: %d, minCount: %d) "
			    "occurred in the node "
			    "(offset: 0x%08X, level: %d).\n",
			    node.getCount(), minCount,
			    (int)node.getOffset(), level);
		}

		for ( i=0; i<node.getCount(); i++ ) {
		    long offset = node.getOffsetAt(i);
		    HnSRTreeReinsert reinsert;

		    reinsert = new_HnSRTreeReinsert(offset, level);
		    reinsertList.addElement(reinsert);
		}
		releaseBlock(node.getOffset());
	    }
	    else {
		writeNode(node);
		cluster = node.getCluster();
	    }
	}
	else {
	    node.setClusterAt(cluster, cursor);

	    writeNode(node);
	    cluster = node.getCluster();
	}
    }
		
    /* root node */
    node = stack.getTopNode();
    cursor = stack.getCursor();
    level = info.getHeight() - stack.getDepth();
    stack.pop();

    if ( underflow ) {
	node.removeElementAt(cursor);

	if ( node.getCount() == 1 ) {
	    if ( debug ) {
		fprintf(stderr, "HnSRTreeFile::remove: tree is shrunken.\n");
	    }

	    releaseBlock(node.getOffset());
	    info.setRootOffset(node.getOffsetAt(0));
	    info.setHeight(info.getHeight() - 1);

	    writeSuperBlock();
	}
	else {
	    writeNode(node);
	}
    }
    else {
	node.setClusterAt(cluster, cursor);

	writeNode(node);
    }

    /* reinsert orphaned entries */
    while ( reinsertList.size() != 0 ) {
	if ( reinsertList[0].getType() == HnSRTreeReinsert::POINT ) {
	    HnPoint point = reinsertList[0].getPoint();
	    HnDataItem dataItem = reinsertList[0].getDataItem();

	    if ( debug ) {
		fprintf(stderr,
			"HnSRTreeFile::remove: reinserting point %s.\n",
			(char *)point.toString());
	    }

	    reinsertList.removeElementAt(0);
	    insertPoint(point, dataItem);
	}
	else {
	    long offset = reinsertList[0].getOffset();
	    int level = reinsertList[0].getLevel();

	    if ( debug ) {
		fprintf(stderr, "HnSRTreeFile::remove: "
			"reinserting block 0x%08X at the level %d.\n",
			(int)offset, level);
	    }

	    reinsertList.removeElementAt(0);
	    insertBlock(offset, level);
	}
    }

    if ( debug ) {
	fprintf(stderr,	"HnSRTreeFile::remove: removal is finished.\n");
    }

    if ( debug ) {
	check();
    }
}

HnSRTreeStack
HnSRTreeFileObj::searchPoint(const HnPoint &point, const HnDataItem &dataItem)
{
    HnSRTreeStack stack;
    HnSRTreeBlock block;
    HnSRTreeNode node;
    HnSRTreeLeaf leaf;
    HnBool found;

    stack = new_HnSRTreeStack();

    block = readBlock(info.getRootOffset());
    if ( block.isNode() ) {
	node = new_HnSRTreeNode(info, block);
	stack.push(node);
    }
    else if ( block.isLeaf() ) {
	leaf = new_HnSRTreeLeaf(info, block);
	stack.push(leaf);
    }
    else {
	HnAbort("unexpected block type.");
    }

    for ( ;; ) {
	if ( stack.isTopNode() ) {
	    node = stack.getTopNode();
		
	    /*
	     * search for overlapping entries
	     */

	    if ( node.getCount() == 0 ) {
		found = HnFALSE;
	    }
	    else {
		for ( ;; ) {
		    int cursor = stack.getCursor();
		    HnRect rect = node.getClusterAt(cursor).getRect();

		    if ( debug ) {
			fprintf(stderr,
				"comparing with a rect #%d at a node %08X "
				"on the level %d.\n",
				cursor,
				(int)node.getOffset(), stack.getDepth() - 1);
		    }

		    if ( rect.includes(point) ) {
			found = HnTRUE;
			break;
		    }

		    if ( !stack.hasMore() ) {
			found = HnFALSE;
			break;
		    }

		    stack.advance();
		}
	    }

	    if ( found ) {
		int cursor = stack.getCursor();
		block = readBlock(node.getOffsetAt(cursor));

		if ( block.isNode() ) {
		    node = new_HnSRTreeNode(info, block);
		    stack.push(node);
		}
		else if ( block.isLeaf() ) {
		    leaf = new_HnSRTreeLeaf(info, block);
		    stack.push(leaf);
		}
		else {
		    HnAbort("unexpected block type.");
		}
	    }
	    else {
		/*
		 * go up until the top node has some remaining
		 * entries or the stack is empty.
		 */

		for ( ;; ) {
		    stack.pop();

		    if ( stack.getDepth() == 0 )
			return HnSRTreeStack::null;

		    if ( stack.hasMore() ) {
			break;
		    }
		}

		stack.advance();
	    }
	}
	else {
	    leaf = stack.getTopLeaf();

	    /*
	     * search for overlapping entries
	     */

	    if ( leaf.getCount() == 0 ) {
		found = HnFALSE;
	    }
	    else {
		for ( ;; ) {
		    int cursor = stack.getCursor();
		    HnPoint leafPoint = leaf.getPointAt(cursor);
		    HnDataItem leafDataItem = leaf.getDataItemAt(cursor);

		    if ( debug ) {
			fprintf(stderr,
				"comparing with a point #%d at a leaf %08X "
				"on the level %d.\n",
				cursor,
				(int)leaf.getOffset(), stack.getDepth() - 1);
		    }

		    if ( leafPoint.equals(point) &&
			 leafDataItem.equals(dataItem) ) {
			found = HnTRUE;
			break;
		    }

		    if ( !stack.hasMore() ) {
			found = HnFALSE;
			break;
		    }

		    stack.advance();
		}
	    }

	    if ( found ) {
		return stack;
	    }
	    else {
		/*
		 * go up until the top node has some remaining
		 * entries or the stack is empty.
		 */

		for ( ;; ) {
		    stack.pop();

		    if ( stack.getDepth() == 0 ) {
			return HnSRTreeStack::null;
		    }

		    if ( stack.hasMore() ) {
			break;
		    }
		}

		stack.advance();
	    }
	}
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -