📄 srtree_build.c
字号:
#include <stdio.h>#include <stdlib.h>#include <limits.h>#include <math.h>#include <sys/resource.h>#include <sys/times.h>#include <unistd.h>#include "srtree.h"//#define DEBUG#define CHECK_TREE_LOG "check_tree_log"#define _EPSILON 1e-10void overflow(node_type *, int, int, node_type *, node_type *);/****************************//* Start Utility procedures *//****************************/void initialize(config_type *config, char *configFile){ FILE *fp; int string_len; //fp = fopen(CONFIG_FILE, "r"); fp = fopen(configFile, "r"); fscanf(fp, "m=%d\n", &m); fscanf(fp, "M=%d\n", &M); fscanf(fp, "dim=%d\n", &dim); fscanf(fp, "reinsert_p=%d\n", &reinsert_p); fscanf(fp, "no_histogram=%d\n",&no_histogram); if ((M+1-reinsert_p < m) || (reinsert_p < 1)) { fprintf(stderr, "Invalid t assignment of reinsert_p with M = %d and m = %d\n", M, m); fprintf(stdout, "Invalid assignment of reinsert_p with M = %d and m = %d\n", M, m); exit(EXIT_FAILURE); } fgets(config->datafile, FILENAME_MAX, fp); string_len = strlen(config->datafile); config->datafile[string_len-1] = '\0'; fgets(config->queryfile, FILENAME_MAX, fp); string_len = strlen(config->queryfile); config->queryfile[string_len-1] = '\0'; sprintf(config->save_tree_file, "%s.srtree", config->datafile);} /* initialize */void tree_leaf_node_allocate(node_type **node){ (*node) = (node_type *)malloc(sizeof(node_type)); if (!*node) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->a = (double *)malloc(sizeof(double) * dim); (*node)->b = (double *)malloc(sizeof(double) * dim); if (!(*node)->a || !(*node)->b) { printf("Out of memory\n"); exit(EXIT_FAILURE); }} /* tree_leaf_node_allocate */void tree_node_allocate(node_type **node){ (*node) = (node_type *)malloc(sizeof(node_type)); if (!*node) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->a = (double *)malloc(sizeof(double) * dim); (*node)->b = (double *)malloc(sizeof(double) * dim); (*node)->ptr = (node_type **)malloc(sizeof(node_type *) * M); if (!(*node)->a || !(*node)->b || !(*node)->ptr) { printf("Out of memory\n"); exit(EXIT_FAILURE); } (*node)->centroid = (double *)malloc(sizeof(double) * dim); if (!(*node)->centroid) { printf("Out of memory\n"); exit(EXIT_FAILURE); }}void tree_node_deallocate(node_type *free_node){ free(free_node->a); free(free_node->b); free(free_node->ptr); // Ling free(free_node->centroid); free(free_node); return;}void cal_MBR_node_node(double *new_a, double *new_b, node_type *node1, node_type *node2){ int i; for (i=0; i<dim; i++) { if (node1->a[i] < node2->a[i]) new_a[i] = node1->a[i]; else new_a[i] = node2->a[i]; } for (i=0; i<dim; i++) { if (node1->b[i] > node2->b[i]) new_b[i] = node1->b[i]; else new_b[i] = node2->b[i]; } return;}double cal_vol(double *a, double *b){ int i; double volume = 1.0; for (i=0; i<dim; i++) volume = volume * (double)(b[i] - a[i]); return(volume);}double cal_overlap(node_type *node1, node_type *node2){ double overlap; int i; overlap = 1.0; for (i=0; i<dim; i++) { /* 6 possible cases */ if (node2->a[i] > node1->b[i] || node1->a[i] > node2->b[i]) { overlap = 0.0; break; } else if (node2->a[i] <= node1->a[i]) { if (node2->b[i] <= node1->b[i]) { // a2, a1, b2, b1 overlap = overlap * (node2->b[i] - node1->a[i]); } else { // a2, a1, b1, b2 overlap = overlap * (node1->b[i] - node1->a[i]); } } else if (node1->a[i] < node2->a[i]) { if (node2->b[i] <= node1->b[i]) { // a1, a2, b2, b1 overlap = overlap * (node2->b[i] - node2->a[i]); } else { // a1, a2, b1, b2 overlap = overlap * (node1->b[i] - node2->a[i]); } } } return(overlap);}double cal_overlap_sum(node_type *node, int index_skip, node_type *parent_node){ double overlap; int i, stop; overlap = 0.0; stop = M - parent_node->vacancy; for (i=0; i<stop; i++) { if (i == index_skip) continue; overlap = overlap + cal_overlap(parent_node->ptr[i], node); } return(overlap);}double Dist2(node_type *node1, node_type *node2){ double distance; double *point1, *point2; int i; point1 = (double *)malloc(sizeof(double) * dim); point2 = (double *)malloc(sizeof(double) * dim); for (i=0; i<dim; i++) { point1[i] = (node1->b[i] - node1->a[i])/2.0; point2[i] = (node2->b[i] - node2->a[i])/2.0; } distance = 0.0; for (i=0; i<dim; i++) { distance = distance + pow((double)(point1[i] - point2[i]), 2.0); } return(pow(distance, 0.5));}/**************************//* End Utility procedures *//**************************/int least_overlap_enlarge(node_type *parent_node, node_type *data_node){ double new_overlap_diff, old_overlap, new_overlap; double vol_at_index, new_vol, min_overlap_diff; int index; node_type *temp_node; int i, stop; tree_node_allocate(&temp_node); stop = M - parent_node->vacancy; for(i=0; i<stop; i++) { /* original overlap */ old_overlap = cal_overlap_sum(parent_node->ptr[i], i, parent_node); /* overlap after enlargement */ cal_MBR_node_node(temp_node->a, temp_node->b, parent_node->ptr[i], data_node); new_overlap = cal_overlap_sum(temp_node, i, parent_node); /* check if index is needed to updated */ new_overlap_diff = new_overlap - old_overlap; if (i == 0) { index = i; min_overlap_diff = new_overlap_diff; } else { if (new_overlap_diff < min_overlap_diff) { index = i; min_overlap_diff = new_overlap_diff; } else if (new_overlap_diff == min_overlap_diff) { vol_at_index = cal_vol(parent_node->ptr[index]->a, parent_node->ptr[index]->b); new_vol = cal_vol(temp_node->a, temp_node->b); if(new_vol < vol_at_index) { index=i; } } } } /* end i */ tree_node_deallocate(temp_node); return (index); }/********************************************************//* least_area_enlarge(): *//* Select the node which will cause least bounding *//* box enlargement when insert an entry to it *//********************************************************/int least_area_enlarge(node_type *parent_node, node_type *data_node){ double new_vol_diff, old_vol, new_vol; double vol_at_index, min_vol_diff; int index; double *temp_a, *temp_b; int i, stop; temp_a = (double *)malloc(sizeof(double) * dim); temp_b = (double *)malloc(sizeof(double) * dim); stop = M - parent_node->vacancy; for(i=0; i<stop; i++) { /* original volume */ old_vol = cal_vol(parent_node->ptr[i]->a, parent_node->ptr[i]->b); /* volume after enlargement */ cal_MBR_node_node(temp_a, temp_b, parent_node->ptr[i], data_node); new_vol = cal_vol(temp_a, temp_b); /* check if index is needed to updated */ new_vol_diff = new_vol - old_vol; if (i == 0) { index = i; min_vol_diff = new_vol_diff; vol_at_index = new_vol; } else { if (new_vol_diff < min_vol_diff) { index = i; min_vol_diff = new_vol_diff; vol_at_index = new_vol; } else if (new_vol_diff == min_vol_diff) { if(new_vol < vol_at_index) { index=i; vol_at_index = new_vol; } } } } /* end i */ free(temp_a); free(temp_b); return (index);} /* least_enlarge *//* Return the sqaure distance between the centroid */double dist_centroid(double *centroid1, double *centroid2){ int i; double distance=0.0; for (i=0; i < dim; i++) { distance += (centroid1[i] - centroid2[i]) * (centroid1[i] - centroid2[i]); } return distance;}/* Find the subtree that is the closest to a given node by comparing the centroids */int nearest_centroid(node_type *node1, double *centroid2){ int child_chosen; double min_distance = FLT_MAX; double square_distance; int i; //printf("%d %d %d\n", M, node1->vacancy, M-node1->vacancy); if (node1->ptr[0]->attribute == LEAF) { for (i=0; i < M-node1->vacancy; i++) { square_distance = dist_centroid(node1->ptr[i]->a, centroid2); if (min_distance > square_distance) { min_distance = square_distance; child_chosen = i; } } } else { for (i=0; i < M-node1->vacancy; i++) { square_distance = dist_centroid(node1->ptr[i]->centroid, centroid2); if (min_distance > square_distance) { min_distance = square_distance; child_chosen = i; } } } //printf("child_chosen = %d\n", child_chosen); return (child_chosen);}/**********************************************************//* choose_leaf(): *//* Select a leaf node in which to place a new index entry *//* current_level is the level of the current_node *//**********************************************************/int choose_leaf(node_type **node_found, node_type *current_node, int current_level, node_type *data_node) { int child_chosen, level_found; /*******/ /* CL1 */ /*******/ /**********************************************************/ /* Initialise: */ /* Set N to be the root node(already done in insert_node) */ /**********************************************************/ /* It has been already done in insert() */ /*******/ /* CL2 */ /*******/ /**************/ /* Leaf check */ /**************/ if(current_node->attribute == ROOT && (current_node->ptr[0]==NULL || current_node->ptr[0]->attribute==LEAF)) { /************************************************************ *node_found is the root of the tree because the root is the only internal node of the tree at that time. *************************************************************/ return(current_level); // root is at level 0 } if (current_node->ptr[0]->attribute == LEAF) { *node_found = current_node; return(current_level); } /*******/ /* CL3 */ /*******/ /******************/ /* Choose subtree */ /******************/ //if (current_node->ptr[0]->ptr[0]->attribute != LEAF) { if (data_node->attribute != LEAF) { child_chosen = nearest_centroid(current_node, data_node->centroid); //child_chosen = least_area_enlarge(current_node, data_node); } else { child_chosen = nearest_centroid(current_node, data_node->a); //child_chosen = least_overlap_enlarge(current_node, data_node); } /*******/ /* CL4 */ /*******/ /***********************/ /* Descend recursively */ /***********************/ current_node = current_node->ptr[child_chosen]; level_found = choose_leaf(node_found, current_node, current_level+1, data_node); return(level_found);} /* choose_leaf */// Find the MAXDIST between point P and the hyper-rectangle (a, b)double MAXDIST(double P[], double *a, double *b){ int i; double sum = 0.0; for (i=0; i < dim; i++) { if (P[i] > b[i]) sum += pow(P[i] - a[i], 2.0); else if (P[i] < a[i]) sum += pow(b[i] - P[i], 2.0); else if (P[i] - a[i] >= b[i] - P[i]) sum += pow(P[i] - a[i], 2.0); else sum += pow(b[i] - P[i], 2.0); } return sqrt(sum);}// Update the radius of a nodevoid update_radius_node(node_type *node){ double max_dist_sphere = 0.0; double max_dist_rectangle = 0.0; double dist; int i; if (node->ptr[0]->attribute == LEAF) { for (i=0; i < M-node->vacancy; i++) { //square_dist = dist_centroid(node->centroid, node->ptr[i]->a) + node->ptr[i]->radius; dist = dist_centroid(node->centroid, node->ptr[i]->a); // node->ptr[i]->radius==0 if (max_dist_sphere < dist) max_dist_sphere = dist; // no need to find MAXDIST because max_dist_ractangle == max_dist_sphere } node->radius = sqrt(max_dist_sphere); return; } else { for (i=0; i < M-node->vacancy; i++) { dist = sqrt(dist_centroid(node->centroid, node->ptr[i]->centroid)) + node->ptr[i]->radius; if (max_dist_sphere < dist) max_dist_sphere = dist; dist = MAXDIST(node->centroid, node->ptr[i]->a, node->ptr[i]->b); if (max_dist_rectangle < dist) max_dist_rectangle = dist; } } // choose the min if (max_dist_sphere < max_dist_rectangle) node->radius = max_dist_sphere; else node->radius = max_dist_rectangle; return;}/* update the centroid of a given node */int update_centroid_node(node_type *node){ double centroid; int flag_change=FALSE; int total_size; int i; int j; //printf("h7\n"); if (node->ptr[0]->attribute == LEAF) { total_size = M-node->vacancy; for (j=0; j < dim; j++) { centroid = 0.0; for (i=0; i < total_size; i++) centroid += node->ptr[i]->a[j]; centroid = centroid * 1.0 / total_size; if (node->centroid[j] != centroid || node->total_size != total_size) { flag_change = TRUE; node->centroid[j] = centroid; } } node->total_size = total_size; } else { for (j=0; j < dim; j++) { centroid = 0.0; total_size = 0; for (i=0; i < M-node->vacancy; i++) { centroid += node->ptr[i]->centroid[j] * node->ptr[i]->total_size; total_size += node->ptr[i]->total_size; } centroid = centroid * 1.0 / total_size; if (node->centroid[j] != centroid || node->total_size != total_size) { flag_change = TRUE; node->centroid[j] = centroid; } } node->total_size = total_size; } //printf("h8\n"); return flag_change;} /* update_centroid_node *//* Update the centroid from the parent of given node up to the root */void update_centroid(node_type *node_inserted){ node_type *node = node_inserted; int flag_change; //printf("h9\n"); while(node->attribute != ROOT) { node = node->parent; flag_change = update_centroid_node(node); update_radius_node(node); if (!flag_change) break; } //printf("h10\n"); return;} /* update_centroid */void adjust_MBR(node_type *node_inserted){ node_type *node = node_inserted; int i, flag; //printf("h1\n"); while(node->attribute != ROOT) { flag = FALSE; for(i=0; i < dim; i++) { if(node->parent->a[i] > node->a[i]) { node->parent->a[i] = node->a[i]; flag = TRUE; } if(node->parent->b[i] < node->b[i]) { node->parent->b[i] = node->b[i]; flag = TRUE; } } if (flag == FALSE) break; node = node->parent; } //printf("h2\n"); return;} /* adjust_MBR */void adjust_MBR_delete(node_type *node_inserted){ node_type *node = node_inserted, *parent; int j, stop; while(node->attribute != ROOT) { parent = node->parent; stop = M - parent->vacancy;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -