📄 vptree.c
字号:
/////////////////////////////////////////// // (3) Build the VP-Tree recursively vpTree->root = buildVPNode(dataSet->points,dataID,dataSet->nPoints, numBranch,dataSet->dimension); return vpTree;}/////////////////////////////////////////////////////////// (3) Read/Write VPTree file//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// (3a) read the VP-Tree from a file// Internal function : read a VP-nodeVPNode *readVPNode(FILE *fp, int numBranch, int dimension, int *nPoints){ VPNode *vpNode; char buf[255],dummy[255]; int i,j,k; double *ptrPt, f; int *ptrID; ///////////////////////////////// // (1) Initialization vpNode = (VPNode *) malloc(sizeof(VPNode)); if (!vpNode) errexit("Err : Not enough memory for allocation!\n\n"); // for safety vpNode->child = NULL; vpNode->medians = NULL; vpNode->points = NULL; vpNode->dataID = NULL; ///////////////////////////////// // (2) What kind of node? fgets(buf,255,fp); if (!strncmp(buf,_VPTREE_LEAF,strlen(_VPTREE_LEAF))) { // Leaf Node vpNode->isLeaf = _TRUE; // How many data points? sscanf(buf,"%s %d",dummy,nPoints); vpNode->nPoints = *nPoints; // memory allocation vpNode->points = (double *) malloc(sizeof(double)*(*nPoints)*dimension); vpNode->dataID = (int *) malloc(sizeof(int)*(*nPoints)); if (!vpNode->points || !vpNode->dataID) errexit("Err : Not enough memory for allocation!\n\n"); // for fast reference ptrPt = vpNode->points; ptrID = vpNode->dataID; // Read the points and dataID for (i=0; i<*nPoints; i++) { for (j=0; j<dimension; j++) { fscanf(fp,"%lf",&f); *ptrPt++ = f; } fscanf(fp,"%d",ptrID++); //printf("%f %f %f (%d)\n", //*(ptrPt-3),*(ptrPt-2),*(ptrPt-1),*(ptrID-1)); // clear this line in fp fgets(buf,255,fp); } } else if (!strncmp(buf,_VPTREE_INTERNAL,strlen(_VPTREE_INTERNAL))) { // Internal Node vpNode->isLeaf = _FALSE; // Only one Vantage point vpNode->nPoints = *nPoints = 1; // memory allocation vpNode->points = (double *) malloc(sizeof(double)*dimension); vpNode->dataID = (int *) malloc(sizeof(int)); vpNode->medians = (double *) malloc(sizeof(double)*(numBranch-1)); vpNode->child = (VPNode **) malloc(sizeof(VPNode *)*numBranch); if ( !vpNode->points || !vpNode->dataID || !vpNode->medians || !vpNode->child ) errexit("Err : Not enough memory for allocation!\n\n"); // for fast reference ptrPt = vpNode->points; // Read the vp and its dataID for (j=0; j<dimension; j++) { fscanf(fp,"%lf",&f); *ptrPt++ = f; } fscanf(fp,"%d",vpNode->dataID); // clear this line in fp fgets(buf,255,fp); //printf("%f %f %f (%d)\n", //*(ptrPt-3),*(ptrPt-2),*(ptrPt-1),vpNode->dataID[0]); // Read the children and medians for (i=0; i<numBranch; i++) { // read child vpNode->child[i] = readVPNode(fp,numBranch,dimension,&k); *nPoints += k; // read medians if (i != numBranch-1) { fgets(buf,255,fp); sscanf(buf,"%s %s %lf",dummy,dummy,&f); vpNode->medians[i] = f; } } } else { printf("UNKNOWN NODE TYPE [%s]\n",buf); return NULL; } return vpNode;}// on error, return NULLVPTree *readVPTree(char *treeFile, int *nPoints){ VPTree *vpTree; FILE *fp; char buf[255],dummy[255]; ///////////////////////////////// // (1) open file fp = fopen(treeFile,"r"); if (!fp) { fprintf(stderr,"ERR : cannot open [%s] for reading!\n\n",treeFile); return NULL; } vpTree = (VPTree *) malloc(sizeof(VPTree)); if (!vpTree) errexit("Err : Not enough memory for allocation!\n\n"); ///////////////////////////////// // (2) write header fgets(buf,255,fp); sscanf(buf,"%s",dummy); if (strcmp(dummy,_VPTREE_ID)) { fprintf(stderr,"Err : [%s] is not a vptree file\n\n"); return NULL; } fgets(buf,255,fp); sscanf(buf,"%s %s %d",dummy,dummy,&(vpTree->numBranch)); fgets(buf,255,fp); sscanf(buf,"%s %s %d",dummy,dummy,&(vpTree->dimension)); //printf("Branching : %d\n",vpTree->numBranch); //printf("Dimension : %d\n",vpTree->dimension); ///////////////////////////////// // (3) read vpNode recursively vpTree->root = readVPNode(fp,vpTree->numBranch,vpTree->dimension,nPoints); ///////////////////////////////// // (4) Done fclose(fp); return vpTree;}/////////////////////////////////////////////////////////// (3b) write the VP-Tree to a file// Internal functionintwriteVPNode(FILE *fp, VPNode *vpNode, int numBranch, int dimension){ int i,j,nPoints,*ptrID; double *ptrPt; if (vpNode->isLeaf) { //////////////////////// // leaf node //////////////////////// // initialization nPoints = vpNode->nPoints; ptrPt = vpNode->points; ptrID = vpNode->dataID; // identifier fprintf(fp,"%s %d\n",_VPTREE_LEAF,nPoints); // data points for (i=0; i<nPoints; i++) { for (j=0; j<dimension; j++) fprintf(fp,"%f ",*ptrPt++); fprintf(fp,"%d\n",*ptrID++); } } else { //////////////////////// // Internal node //////////////////////// // initialization nPoints = 1; ptrPt = vpNode->points; // identifier fprintf(fp,"%s\n",_VPTREE_INTERNAL); // vantage point(s) for (i=0; i<dimension; i++) fprintf(fp,"%f ",*ptrPt++); fprintf(fp,"%d\n",vpNode->dataID[0]); for (i=0; i<numBranch; i++) { nPoints += writeVPNode(fp,vpNode->child[i],numBranch,dimension); if (i != numBranch-1) fprintf(fp,"median : %f\n",vpNode->medians[i]); } } return nPoints;}// External function// on error, return -1, else number of data points writtenintwriteVPTree(char *treeFile, VPTree *vpTree){ int nPoints; FILE *fp; ///////////////////////////////// // (1) open file fp = fopen(treeFile,"w"); if (!fp) { fprintf(stderr,"ERR : cannot open [%s] for writing!\n\n",treeFile); return -1; } ///////////////////////////////// // (2) write header fprintf(fp,"%s\n",_VPTREE_ID); fprintf(fp,"Branching : %d\n",vpTree->numBranch); fprintf(fp,"Dimension : %d\n",vpTree->dimension); ///////////////////////////////// // (3) write vpNode recursively nPoints = writeVPNode( fp, vpTree->root, vpTree->numBranch, vpTree->dimension ); ///////////////////////////////// // (4) Done fclose(fp); return nPoints;}/////////////////////////////////////////////////////////// (4) K-Nearest Neighbor Search/////////////////////////////////////////////////////////intsearchKNNVPNode(VPNode *vpNode, double *queryPt, double *kMinDist, double *resultPt, int *resultID, int numBranch, int dimension, int k){ double dist, *ptrPt; int i, x, y; int index, nodeCount; // visited me nodeCount = 1; // what kind of node? if (vpNode->isLeaf) { ptrPt = vpNode->points; for (i=0; i<vpNode->nPoints; i++) { // compute distance dist = sqrt( sqrDist(queryPt,ptrPt,dimension) ); // shorter? if (kMinDist[k-1] > dist) { for (x=0; x<k-1; x++) if (kMinDist[x] > dist) break; index = x; for (x=k-1; x>index; x--) { kMinDist[x] = kMinDist[x-1]; resultID[x] = resultID[x-1]; for (y=0; y<dimension; y++) resultPt[x*dimension+y] = resultPt[(x-1)*dimension+y]; } kMinDist[index] = dist; memcpy(&(resultPt[index*dimension]),ptrPt,sizeof(double)*dimension); resultID[index] = vpNode->dataID[i]; } // next ptrPt += dimension; } } else { // compute distance dist = sqrt( sqrDist(queryPt,vpNode->points,dimension) ); // shorter? if (kMinDist[k-1] > dist) { for (i=0; i<k-1; i++) if (kMinDist[i] > dist) break; index = i; for (i=k-1; i>index; i--) { kMinDist[i] = kMinDist[i-1]; resultID[i] = resultID[i-1]; for (x=0; x<dimension; x++) resultPt[i*dimension+x] = resultPt[(i-1)*dimension+x]; } kMinDist[index] = dist; memcpy(&(resultPt[index*dimension]),vpNode->points,sizeof(double)*dimension); resultID[index] = vpNode->dataID[0]; } // Prune the near part for (i=0; i<numBranch-1; i++) if (kMinDist[k-1] + vpNode->medians[i] >= dist) break; // Prune the far part for (; i<numBranch; i++) { nodeCount += searchKNNVPNode( vpNode->child[i], queryPt, kMinDist, resultPt, resultID, numBranch, dimension, k ); if ( i != numBranch-1 && kMinDist[k-1] + dist < vpNode->medians[i] ) break; } } return nodeCount;}// Perform K-Nearest Neighbor Search// Input :// - vpTree// - queryPt// - k// Return :// - number of nodes visited (int)// - list of points -> resultPt is returned in argument// - list of dataID -> resultID is returned in argument// Note : - memory for resultID and resultPt should have been // - allocated before calling.intknnsearch(VPTree *vpTree, double *queryPt, int k, double *resultPt, int *resultID){ double *kMinDist; int i,nodeCount; // Assertion assert(resultID); assert(resultPt); // initialize distances and nodeCount kMinDist = (double *) malloc(sizeof(double) * k); if (kMinDist == NULL) { printf("Err : not enough memory!\n\n"); return; } for (i=0; i < k; i++) kMinDist[i] = _LARGE_VALUE; // Start searching nodeCount = searchKNNVPNode( vpTree->root, queryPt, kMinDist, resultPt, resultID, vpTree->numBranch, vpTree->dimension, k ); return nodeCount;}/////////////////////////////////////////////////////////// (5) Free the resource/////////////////////////////////////////////////////////voidfreeDataSet(DataSet *dataSet){ if (dataSet) { if (dataSet->points) free(dataSet->points); free(dataSet); }}voidfreeVPNode(VPNode *vpNode, int numBranch){ int i; if (vpNode) { // free the child if (!vpNode->isLeaf && vpNode->child) { for (i=0; i<numBranch; i++) if (vpNode->child[i]) freeVPNode(vpNode->child[i],numBranch); } // free myself if (vpNode->child) free(vpNode->child); if (vpNode->medians) free(vpNode->medians); if (vpNode->points) free(vpNode->points); if (vpNode->dataID) free(vpNode->dataID); free(vpNode); }}voidfreeVPTree(VPTree *vpTree){ if (vpTree) { // free the root if (vpTree->root) freeVPNode(vpTree->root,vpTree->numBranch); // free myself free(vpTree); }}// Print pointvoidprintPt(FILE *out, double *pt, int dim){ int i; fprintf(out,"%f",pt[0]); for (i=1; i<dim; i++) fprintf(out," %f",pt[i]);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -