⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vptree.c

📁 基于内容的多媒体数据库检索算法
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * vptree.c * * VP-tree * * - 1) Dataset reading * - 2) Building VP-Tree * - 3) Read/Write VP-Tree from/to a file * - 4) K-NNSearch * - 5) Releasing Resource * * By Philip Fu * * Fri Jan 12 22:58:44 EST 2001 * */#include <stdio.h>#include <stdlib.h>#include <limits.h>#include <math.h>#include <assert.h>#include "vptree.h"#include "qsort.h"#include "standard.h"////////////////////////////////////////////////////////////////// Function Definitions//////////////////////////////////////////////////////////////////#define _DEBUG/////////////////////////////////////////////////////////// (1) Reading the dataset/////////////////////////////////////////////////////////DataSet *readDataSet(int n, int dim, char *dataFile){    FILE   *fp;    double *ptr,f;    DataSet *dataSet;    char     buf[255];    int      i;    /////////////////////////////////////////////    // (1) Open the dataFile    fp = fopen(dataFile,"r");    if (fp == NULL) {	printf("Err : Cannot open file [%s] for reading!\n",dataFile);	return NULL;    }    /////////////////////////////////////////////    // (2) Initialize dataSet    dataSet = (DataSet *) malloc(sizeof(DataSet));    if (dataSet == NULL) {	printf("Err : not enough memory for data allocation!\n\n");	return NULL;    }    /////////////////////////////////////////////    // (3) Read the header    // read number of points    // fgets(buf,255,fp);    // sscanf(buf,"%d",&i);    dataSet->nPoints = n;    // read dimension    // fgets(buf,255,fp);    // sscanf(buf,"%d",&i);    dataSet->dimension = dim;    //printf("number of points : %d\n",dataSet->nPoints);    //printf("dimension        : %d\n",dataSet->dimension);    /////////////////////////////////////////////    // (4) Read the points    dataSet->points = (double *) 	malloc(sizeof(double)*(dataSet->nPoints)*(dataSet->dimension));    if (dataSet->points == NULL) {	printf("Err : not enough memory for data allocation!\n\n");	return NULL;    }    ptr = dataSet->points;    for (i=0; i<(dataSet->nPoints)*(dataSet->dimension); i++) {	fscanf(fp,"%lf",&f);	*ptr++ = f;    }    /////////////////////////////////////////////    // (5) Done    fclose(fp);    return dataSet;}/////////////////////////////////////////////////////////// (2) Building the VP Tree/////////////////////////////////////////////////////////doublesqrDist(double *p1, double *p2, int dim){    double dist;    int    i;    dist = pow( ((*p1++) - (*p2++)), 2.0 );    for (i=1; i<dim; i++)	dist += pow( ((*p1++) - (*p2++)), 2.0 );    return dist;}// Building a VP NodeVPNode *buildVPNode(double *points, int *dataID, int nPoints,             int numBranch, int dimension){    VPNode *vpNode;    int     i,j;    double *distances;    int  nSubset;    int *subset;    double stddev,bestStddev,dist;    int    vp,bestVP;    double *ptrVP, *ptrPt, *ptrDist;    int    *sortList;    double *tmpPt;    int    *tmpID, *ptrID;    int lastIndex,childIndex;    #ifdef _DEBUG    printf("\nEnter buildVPNode (nPoints : %d) and (address %d)\n",	nPoints,points);    printf("dataID : %d\n",dataID[0]);    #endif    /////////////////////////////////////////////    // (1) Who am I?    vpNode = (VPNode *) malloc(sizeof(VPNode));    if (vpNode == NULL)	errexit("Err : Not enough memory for allocation!\n\n");    /////////////////////////////////////////////    // (2) Fill in information    // avoid leakage when free()    vpNode->child   = NULL;    vpNode->medians = NULL;    vpNode->points  = NULL;    vpNode->dataID  = NULL;    // should we stop?    if (nPoints <=_MAX_POINTS_NODE) {	vpNode->isLeaf = _TRUE;	// alloc. data points in leaf node	vpNode->nPoints = nPoints;	vpNode->points  = (double *) malloc(sizeof(double)*dimension*nPoints);	vpNode->dataID  = (int *)    malloc(sizeof(int)*nPoints);	if (!vpNode->points || !vpNode->dataID)	    errexit("Err : Not enough memory for allocation!\n\n");	memcpy(vpNode->points,	       points,	       sizeof(double)*dimension*nPoints);	memcpy(vpNode->dataID,	       dataID,	       sizeof(int)*nPoints);	return vpNode;    } else {	vpNode->isLeaf = _FALSE;	// alloc. vp	vpNode->nPoints = 1;	vpNode->points  = (double *) malloc(sizeof(double)*dimension);	vpNode->dataID  = (int *)    malloc(sizeof(int));	if (!vpNode->points || !vpNode->dataID)	    errexit("Err : Not enough memory for allocation!\n\n");	// * vp to be found later    }    // Initialize distances    distances = (double *) malloc(sizeof(double)*(nPoints-1));    if (!distances)	errexit("Err : Not enough memory for allocation!\n\n");    /////////////////////////////////////////////    // (3) Find the vantage point    // first find a random "subset" in the dataset    if (nPoints <= _NUM_PT_RANDOM_SUBSET) {	// no need to find a random subset	nSubset = nPoints;	subset  = (int *) malloc(sizeof(int)*nSubset);	if (subset == NULL)	    errexit("Err : Not enough memory for allocation!\n\n");	for (i=0; i<nSubset; i++)	    subset[i] = i;    } else {	// find a random subset	nSubset = _NUM_PT_RANDOM_SUBSET;	subset  = (int *) malloc(sizeof(int)*nSubset);	if (subset == NULL)	    errexit("Err : Not enough memory for allocation!\n\n");	srand(time(NULL));	for (i=0; i<nSubset; i++)	    subset[i] = rand() % nPoints;    }    // Randomize some candidate vps and the one     // with the largest standard dev. will be the VP    srand(time(NULL));    // initialize    bestStddev = 0.0;    bestVP     = -1;    for (j=0; j<_NUM_CAND_VP; j++) {	// randomize a candidate vp	vp    = rand() % nPoints;	ptrVP = points + vp*dimension;	// measure the standard dev. against this vp	dist = 0.0;	for (i=0; i<nSubset; i++)	    dist += sqrDist( ptrVP, 	                     points + subset[i]*dimension, 	                     dimension );	stddev = sqrt(dist/nSubset);	// largest?	if (bestStddev < stddev) {	    bestStddev = stddev;	    bestVP     = vp;	}    }    free(subset);    #ifdef _DEBUG    printf("bestVP : %d at (%f %f %f) with stdev %f\n",	bestVP,ptrVP[0],ptrVP[1],ptrVP[2],bestStddev);    #endif    /////////////////////////////////////////////    // (4) Make the VP at the 1st    //     position by swapping    // copy from (bestVP)th to vpNode    memcpy(vpNode->points,           points + bestVP*dimension,           sizeof(double)*dimension);    vpNode->dataID[0] = dataID[bestVP];    // copy from 0th element to (bestVP)th    memcpy(points + bestVP*dimension,           points,           sizeof(double)*dimension);    dataID[bestVP] = dataID[0];    // copy from vpNode to 0th element    memcpy(points,           vpNode->points,           sizeof(double)*dimension);    dataID[0] = vpNode->dataID[0];    /////////////////////////////////////////////    // (5) Find the distance of points to the vp    ptrPt   = points + dimension;	// skip the 1st vp    ptrDist = distances;    for (i=0; i<nPoints-1; i++) {	*ptrDist++  = sqrt( sqrDist(points, ptrPt, dimension) );	 ptrPt     += dimension;	#ifdef _DEBUG	printf("cal dist. %f (%f %f %f)\n",*(ptrDist-1),		*(ptrPt-3),*(ptrPt-2),*(ptrPt-1));	#endif    }    /////////////////////////////////////////////    // (6) Sort the points in the array in the    //     ascending order of their corr. distances    // initialize    sortList = (int *) malloc(sizeof(int)*(nPoints-1));    if (sortList == NULL)	errexit("Err : Not enough memory for allocation!\n\n");    for (i=0; i<nPoints-1; i++)	sortList[i] = i;    // sorting (find the sort index)    //bubbleSortByIndex(&sortList, distances, nPoints-1);    q_sort(sortList,distances,0,nPoints-2,nPoints-1);    // rearrange the points -> tmpPt and dataID -> tmpID    tmpPt = (double *) malloc(sizeof(double)*(nPoints-1)*dimension);    tmpID = (int *)    malloc(sizeof(int)*(nPoints-1));    if (!tmpPt || !tmpID)	errexit("Err : Not enough memory for allocation!\n\n");    ptrPt = tmpPt;    ptrID = tmpID;    for (i=0; i<nPoints-1; i++) {	memcpy( ptrPt,	        points + (sortList[i]+1)*dimension,	// +1 to skip 1st vp	        sizeof(double)*dimension );	*ptrID++ = dataID[sortList[i]+1];	ptrPt += dimension;    }    // update points and dataID    memcpy(points+dimension,           tmpPt,           sizeof(double)*(nPoints-1)*dimension);    memcpy(dataID+1,           tmpID,           sizeof(int)*(nPoints-1));    free(tmpPt);    free(tmpID);    #ifdef _DEBUG    for (i=0; i<nPoints-1; i++)	printf("dist[%02d] = %f\n",i,distances[i]);    for (i=0; i<nPoints-1; i++)	printf("sort[%02d] = %d\n",i,sortList[i]);    #endif    /////////////////////////////////////////////    // (7) Fill in medians    // initialize medians    vpNode->medians = (double *) malloc(sizeof(double)*(numBranch-1));    if (!vpNode->medians)	errexit("Err : Not enough memory for allocation!\n\n");    childIndex = 1;    for (i=0; i<numBranch-1; i++) {	// last point in this branch	lastIndex = 1 + (nPoints-1)*(i+1) / numBranch - 1;	if (i == numBranch-1)	    lastIndex = nPoints - 1;	// set median	vpNode->medians[i] = distances[lastIndex-1];	// next branch	childIndex = lastIndex+1;	//printf("vpNode->medians[%d] = %f\n",i,vpNode->medians[i]);    }    free(distances);    free(sortList);    /////////////////////////////////////////////    // (8) Build VPNode of each child    // initialize childs    vpNode->child = 	(VPNode **) malloc(sizeof(VPNode *)*numBranch);    if (! vpNode->child)	errexit("Err : Not enough memory for allocation!\n\n");    childIndex = 1;    for (i=0; i<numBranch; i++) {	// last point in this branch	lastIndex = 1 + (nPoints-1)*(i+1) / numBranch - 1;	if (i == numBranch-1)	    lastIndex = nPoints - 1;        #ifdef _DEBUG	printf("from %d to %d\n",childIndex,lastIndex);	#endif	// set child	vpNode->child[i] = buildVPNode(points+childIndex*dimension,	                               dataID+childIndex,	                               lastIndex-childIndex+1,	                               numBranch,	                               dimension);	#ifdef _DEBUG	if (i != numBranch-1)	    printf("median : %f\n",vpNode->medians[i]);	#endif	// next branch	childIndex = lastIndex+1;    }    return vpNode;}// External Function ://// Building the VP Tree// - if numBranch    <= 1, use _DEFAULT_BRANCH// - note that ordering of points in dataSet is changed after calling//   (return the root node)VPTree *buildVPTree(DataSet *dataSet, int numBranch){    VPTree *vpTree;    int    *dataID, i;    ///////////////////////////////////////////    // Allocate memory    vpTree = (VPTree *) malloc(sizeof(VPTree));    dataID = (int *)    malloc(sizeof(int)*dataSet->nPoints);    if (!vpTree || !dataID)	errexit("Err : Not enough memory for allocation!\n\n");    ///////////////////////////////////////////    // (2) Initialization    if (numBranch <= 1)	numBranch  = _DEFAULT_BRANCH;    vpTree->numBranch = numBranch;    vpTree->dimension = dataSet->dimension;    // dataIDs' keep track of where the points goes in the VP tree    for (i=0; i<dataSet->nPoints; i++)	dataID[i] = i;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -