📄 srtree_search.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <math.h>#include <sys/resource.h>#include <sys/times.h>#include <unistd.h>#include "srtree.h"//#define DEBUG//#define DEBUG_MM#define _EPSILON 1e-10int E_dist_comp_count = 0;int E_page_access_count = 0;int E_represent_accuracy = 0;int E_prune_ans_no = 0;int E_overlap_yes = 0;int E_no_answer = 0;void initialize(config_type *config, char *configFile){ FILE *fp; int string_len; //fp = fopen(CONFIG_FILE, "r"); fp = fopen(configFile, "r"); fscanf(fp, "m=%d\n", &m); fscanf(fp, "M=%d\n", &M); fscanf(fp, "dim=%d\n", &dim); fscanf(fp, "reinsert_p=%d\n", &reinsert_p); fscanf(fp, "no_histogram=%d\n",&no_histogram); fgets(config->datafile, FILENAME_MAX, fp); string_len = strlen(config->datafile); config->datafile[string_len-1] = '\0'; fgets(config->queryfile, FILENAME_MAX, fp); string_len = strlen(config->queryfile); config->queryfile[string_len-1] = '\0'; sprintf(config->save_tree_file, "%s.srtree", config->datafile);}/* initialize */void tree_leaf_node_allocate(node_type **node){ (*node) = (node_type *)malloc(sizeof(node_type)); if (!*node) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->a = (double *)malloc(sizeof(double) * dim); (*node)->b = (double *)malloc(sizeof(double) * dim); if (!(*node)->a || !(*node)->b) { printf("Out of memory\n"); exit(EXIT_FAILURE); }} /* tree_leaf_node_allocate */void tree_node_allocate(node_type **node){ (*node) = (node_type *)malloc(sizeof(node_type)); if (!*node) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->a = (double *)malloc(sizeof(double) * dim); (*node)->b = (double *)malloc(sizeof(double) * dim); (*node)->ptr = (node_type **)malloc(sizeof(node_type *) * M); if (!(*node)->a || !(*node)->b || !(*node)->ptr) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->centroid = (double *)malloc(sizeof(double) * dim); if (!(*node)->centroid) { printf("Out of memory\n"); exit(EXIT_FAILURE); }}int read_query(char *queryfile, double ***query, int no_query){ FILE *fp_pos; //int no_query; int i,j; //no_query=100; fp_pos = fopen(queryfile, "r"); if (!fp_pos) { printf("Can't open file, %s\n", queryfile); exit(EXIT_FAILURE); } (*query) = (double **)malloc(no_query * sizeof(double *)); for (i=0; i<no_query; i++) (*query)[i] = (double *)malloc(dim * sizeof(double)); for (i=0; i<no_query; i++) for (j=0; j<dim; j++) fscanf(fp_pos, "%lf", &((*query)[i][j])); fclose(fp_pos); return(no_query);} /* read_query *//* Distance Computation */double MINDIST(double *P, double *a, double *b){ int i; double sum = 0.0; for(i=0 ;i<dim ;i++) { if (P[i] > b[i]) sum += pow(P[i] - b[i], 2.0); else if (P[i] < a[i]) sum += pow(a[i] - P[i], 2.0); } return sum; }double cal_Euclidean(node_type *node, double *query){ int i; double distance; distance = 0.0; for(i=0; i< dim ;i++) distance += pow((node->a[i] - query[i]),(double)2.0); return (sqrt(distance));}/***********************************//* rectangle_search(): *//* search query points on the tree *//*************************** *******/ int rectangle_search(node_type *curr_node, double *query, double error){ int find_flag; int i, j, stop, flag; int query_dim; query_dim = dim; /* Search leaf node */ if(curr_node->attribute == LEAF) { for(j=0; j<query_dim; j++) printf("%f ", curr_node->a[j]); printf(" at %d\n", curr_node->id); return(FOUND); } stop = M - curr_node->vacancy; for(i=0; i < stop; i++) { flag = TRUE; /* search subtree */ for(j=0; j < query_dim; j++) { if(curr_node->ptr[i]->a[j] > (query[j] + error) || curr_node->ptr[i]->b[j] < (query[j] - error)) { flag = FALSE; break; } } /* search the node which contains the query */ if(flag==TRUE) { find_flag = rectangle_search(curr_node->ptr[i], query, error); } } return(find_flag); }/* rectangle_search *//**********************************//* rectangle_search_tree(): *//* prepare to search query points *//**********************************/ void rectangle_search_tree(node_type *root, int no_query, double **query, double error) { int query_index; int j, find_flag = NOT_FOUND; int query_dim; query_dim = dim; /* start search data points by invoking rectangle_search() */ for(query_index=0; query_index < no_query; query_index++) { printf("Query "); for(j=0; j < query_dim; j++) printf("%f ", query[query_index][j]); printf("is found satisfied with\n"); find_flag = NOT_FOUND; find_flag = rectangle_search(root, query[query_index], error); } } /* rectangle_search_tree */void read_inter_node(node_type *node, FILE *fp){ int i, count; for (i = 0; i<dim; i++) fscanf(fp, "%lf\n", &((node->a)[i])); for (i = 0; i<dim; i++) fscanf(fp, "%lf\n", &((node->b)[i])); fscanf(fp, "%d\n", &(node->attribute)); if (node->attribute != LEAF) for (i = 0; i<dim; i++) fscanf(fp, "%lf\n", &((node->centroid)[i])); else memcpy(node->centroid, node->a, sizeof(double) * dim); if (node->attribute == LEAF) { fscanf(fp, "%d\n", &(node->id)); } fscanf(fp, "%d\n", &(node->vacancy)); if (node->attribute != LEAF) { fscanf(fp, "%lf\n", &(node->radius)); fscanf(fp, "%d\n", &(node->total_size)); } else { node->radius = 0.0; // Ling node->total_size = 1; } /**************************************** printf("press 1\n"); scanf("%d", &dummy); for (i = 0; i<dim; i++) printf("%f ", node->a[i]); printf("\n"); for (i = 0; i<dim; i++) printf("%f ", node->b[i]); printf("\n"); printf("attrib[%d] ", node->attribute); if (node->attribute == LEAF) { printf("id[%d] ", node->id); } printf("vacancy[%d]\n", node->vacancy); ****************************************/ if (node->attribute != LEAF) { count = M - node->vacancy; for (i=0; i<count; i++) { tree_node_allocate(&(node->ptr[i])); read_inter_node(node->ptr[i], fp); } } return; } void read_srtree(node_type **root, char save_tree_file[]){ FILE *fp; tree_node_allocate(root); //fp = fopen(SAVE_SRTREE_FILE, "r"); fp = fopen(save_tree_file, "r"); //printf("save_tree: %s\n", save_tree_file); if (!fp) { printf("Can't open %s\n", save_tree_file); exit(EXIT_FAILURE); } read_inter_node(*root, fp); fclose(fp); return; }void free_tree(node_type *node){ int i; if (!node) return; free(node->a); free(node->b); free(node->centroid); for (i=0; i < M-node->vacancy; i++) free_tree(node->ptr[i]); free(node); return;}void NN_update(NN_type *NN, double dist, node_type *node, int k){ int i=0; while (i<k-1 && NN->next->dist >= dist) { NN->dist=NN->next->dist; NN->oid=NN->next->oid; NN->pointer=NN->next->pointer; NN=NN->next; i++; } NN->dist=dist; NN->oid=node->id; NN->pointer=node; return;}static int compare(ABL *i, ABL *j) { if (i->min > j->min) return (1); if (i->min < j->min) return (-1); return (0);}/* Return the distance between the centroid */double dist_centroid(double *centroid1, double *centroid2){ int i=dim; double d,distance=0.0; while (i--) { d = (*centroid1++) - (*centroid2++); distance += d*d; }/* for (i=0; i < dim; i++) { distance += (centroid1[i] - centroid2[i]) * (centroid1[i] - centroid2[i]); }*/ return sqrt(distance);}void gen_ABL(node_type *node, ABL branch[], double *query, int total){ int i; double dist_rectangle; double dist_sphere; for (i=0;i<total;i++) { branch[i].node=node->ptr[i]; //branch[i].min=MINDIST(query, node->ptr[i]->a, node->ptr[i]->b); dist_rectangle = MINDIST(query, node->ptr[i]->a, node->ptr[i]->b); //dist_rectangle = sqrt(dist_rectangle); dist_sphere = dist_centroid(query, node->ptr[i]->centroid) - node->ptr[i]->radius; if (dist_sphere < 0.0) dist_sphere = 0.0; else dist_sphere *= dist_sphere; #ifdef DEBUG printf("ABL %d: dist_sphere=%f, dist_rectangle=%f, radius=%f\n", i, dist_sphere, dist_rectangle, node->ptr[i]->radius); #endif if (dist_rectangle > dist_sphere) branch[i].min = dist_rectangle; else branch[i].min = dist_sphere; } qsort(branch,total,sizeof(struct BranchArray),compare); return;}void k_NN_NodeSearch(node_type *curr_node, double *query, NN_type *NN, int k){ int i, total; double dist; ABL *branch; E_page_access_count++; /* Please refer NN Queries paper */ if (curr_node->ptr[0]->attribute == LEAF) { total = M - curr_node->vacancy; for (i=0;i<total;i++) { #ifdef DEBUG printf("compare with %d\n", curr_node->ptr[i]->id); #endif dist = cal_Euclidean(curr_node->ptr[i], query); if (dist < NN->dist) NN_update(NN, dist, curr_node->ptr[i], k); } } else { /* Please refer SIGMOD record Sep. 1998 Vol. 27 No. 3 P.18 */ total=M-curr_node->vacancy; branch=(struct BranchArray *)malloc(total*sizeof(struct BranchArray)); #ifdef DEBUG_MM if (!branch) { printf("Out of memory\n"); exit(EXIT_FAILURE); } #endif gen_ABL(curr_node, branch, query, total); #ifdef DEBUG printf("[node=%p]\n", curr_node); i = total; while (i--) printf("branch[%d].min = %f, NN->dist = %f\n", i, branch[i].min, NN->dist); #endif for (i=0;i<total;i++) { if (branch[i].min - _EPSILON >= NN->dist) break; else k_NN_NodeSearch(branch[i].node,query,NN,k); } free(branch); } return;}void k_NN_search(node_type *root, double *query, int k){ int i; NN_type *NN, *head; if ((NN = (NN_type *)malloc(sizeof(NN_type))) == NULL) fprintf(stderr, "malloc error at k-NN_search 1\n"); NN->oid = UNDEFINED; NN->dist = INFINITY; NN->pointer = NULL; head=NN; for (i=0; i<k-1; i++) { if ((NN->next = (NN_type *)malloc(sizeof(NN_type))) == NULL) fprintf(stderr, "malloc error at k-NN_search 1\n"); NN->next->oid = UNDEFINED; NN->next->dist = INFINITY; NN = NN->next; } NN->next = NULL; k_NN_NodeSearch(root, query, head, k); NN=head; printf("\n"); while (NN != NULL) { printf("ID: %d Dist: %f\n", NN->oid, NN->dist); NN = NN->next; } printf("\n"); return; }int main(int argc, char *argv[]){ node_type *root; config_type config; int no_query; double **query; double error; int i, nn; //char E_result_filename[FILENAME_MAX]; // for experiment float userTime, sysTime; struct rusage myTime1, myTime2; FILE *E_result_fp; double E_represent_accuracy_sum, E_prune_accuracy_sum; int E_page_access_count_sum; //////////////////////////// if (argc != 4) { //printf("Usage: %s <config> <k> <#query> <result file>\n", argv[0]); printf("Usage: %s <config> <k> <#query>\n", argv[0]); exit(EXIT_FAILURE); } //strcpy(E_result_filename, argv[4]); //////////////////////////// initialize(&config, argv[1]); printf("read_srtree\n"); read_srtree(&root, config.save_tree_file); printf("read_query\n"); no_query = read_query(config.queryfile, &query, atoi(argv[3])); printf("start searching\n"); if (CHOICE==RANGE_SEARCH) { while (TRUE) { printf("Please input the error bound:"); scanf("%lf", &error); rectangle_search_tree(root, no_query, query, error); } } else if (CHOICE == kNN_SEARCH) { nn = atoi(argv[2]); E_page_access_count_sum = 0; E_represent_accuracy_sum = 0.0; E_prune_accuracy_sum = 0.0; E_dist_comp_count = 0; getrusage(RUSAGE_SELF,&myTime1); for (i=0; i<no_query; i++) { printf("Query %d\n", i+1); E_page_access_count = 0; E_represent_accuracy = 0; E_prune_ans_no = 0; E_overlap_yes = 0; k_NN_search(root, query[i], nn); E_page_access_count_sum += E_page_access_count; E_represent_accuracy_sum += ((double)E_represent_accuracy)/((double)E_page_access_count); E_prune_accuracy_sum += ((double)E_overlap_yes)/((double)E_prune_ans_no); printf("---------------------------\n"); } getrusage(RUSAGE_SELF,&myTime2); // for experiment userTime = ((float) (myTime2.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) + ((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; sysTime = ((float) (myTime2.ru_stime.tv_sec - myTime1.ru_stime.tv_sec)) + ((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; //E_result_fp = fopen(E_result_filename, "w"); E_result_fp = stdout; fprintf(E_result_fp, "Number of query is %d\n", no_query); fprintf(E_result_fp, "Average no. of page accesses = %f\n", (double)E_page_access_count_sum/(double)no_query); fprintf(E_result_fp, "User time : %f seconds\n",userTime); fprintf(E_result_fp, "System time : %f seconds\n",sysTime); fprintf(E_result_fp, "Total time : %f seconds\n",userTime+sysTime); fprintf(E_result_fp, "\n"); fclose(E_result_fp); } // end kNN search printf("free_tree\n"); free_tree(root); return(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -