📄 hyper.cpp
字号:
// printf("\n*******************************************\n");
// printf("Read %d headers\n", i);
fclose(fd);
third=clock();
trace_evaluation(trace, i, root);
forth=clock();
duration2=forth-third;
long int space=0;
space_node(root,space);
space=space/(1024*1024);
printf("规则的个数 初始化时间毫秒 搜索时间毫秒 空间MBytes\n");
printf("%u %f %f %u\n",original,duration1,duration2/i,(node_count*32+branch_count*4+filters_count*16)/1024);
// printf("%u %f %f %u\n",original,duration1,duration2/i,space);
}
node *tree_construction(id_list *filters, hyper_rectangle rectangle, UINT depth) {
UINT id_list_count(id_list *);
UINT minimal_value(id_list *, UINT), maximal_value(id_list *, UINT);
id_list *add_id_list(id_list *, filter *);
id_list *remove_id_list(id_list *, filter *);
bool id_list_compare(id_list *, id_list *);
UINT filter_count=id_list_count(filters), i;
cut_calculation *cuts;
id_list **filter_lists;
node *current_node=(node *)calloc(1, sizeof(node));
node_count++;
//没有给当前节点规定范围,加入对应域的赋值过程
//修改时间:3月3日
for(i=0;i<DIMENSIONS;i++)
{
current_node->start[i]=rectangle.start[i];
current_node->end[i]=rectangle.end[i];
}
/*if the number of filters is less than the certain value, the node becomes to a leaf*/
// if ((filter_count <= bucket_size) || (depth > 8)){
if (filter_count <= common_bucket_size) {
current_node->filters=filters;
current_node->filter_count=filter_count;
filters_count+=filter_count;
// for(i=0;i<DIMENSIONS;i++)
// {
// current_node->start[i]=rectangle.start[i];
// current_node->end[i]=rectangle.end[i];
// }
return current_node;
}
/* find out and remove the filters, which completely cover the rectangle of the current node */
for (id_list *head=filters; head!=NULL; head=head->next) {
bool filter_all_cover=true;
for (i=0; i<DIMENSIONS; i++)
if ((head->filters->start[i] > rectangle.start[i]) || (head->filters->end[i] < rectangle.end[i]))
filter_all_cover=false;
//把通配符的规则加入当前节点
if (filter_all_cover)
current_node->filters=add_id_list(current_node->filters, head->filters);
}
// for (id_list *head=current_node->filters; head!=NULL; head=head->next)
//从原来的链表中将通配符的规则删掉
for (head=current_node->filters; head!=NULL; head=head->next)
filters=remove_id_list(filters, head->filters);
filter_count=id_list_count(filters);
if (filter_count==0)
return current_node;
//这句话在此不知道作用是什么,因此隐去
//修改时间:3月3日
// current_node->filters=NULL;
#ifdef region_compact
/* region compaction */
for (i=0; i<DIMENSIONS; i++) {
rectangle.start[i]=max(minimal_value(filters, i), rectangle.start[i]);
rectangle.end[i]=min(maximal_value(filters, i), rectangle.end[i]);
}
/* This time, we remove the filters covered the compacted region first to speed up */
UINT count=0;
for (id_list *head=filters; (head!=NULL) && (count < common_bucket_size); head=head->next) {
bool filter_all_cover=true;
for (i=0; (i<DIMENSIONS); i++)
if ((head->filters->start[i] > rectangle.start[i]) || (head->filters->end[i] < rectangle.end[i]))
filter_all_cover=false;
if (filter_all_cover) {
count++;
current_node->filters=add_id_list(current_node->filters, head->filters);
}
}
for (id_list *head=current_node->filters; head!=NULL; head=head->next)
filters=remove_id_list(filters, head->filters);
filter_count=id_list_count(filters);
if (filter_count==0)
return current_node;
#endif
/* The function calculates the optimal cut for all dimensions */
cut_calculation *cuts_optimization(hyper_rectangle, id_list *);
cuts=cuts_optimization(rectangle, filters);
UINT total_cuts=1;
for (i=0; i<DIMENSIONS; i++) {
current_node->start[i]=rectangle.start[i];
current_node->end[i]=rectangle.end[i];
current_node->cut[cuts[i].dimension]=cuts[i].cuts;
total_cuts*=cuts[i].cuts;
}
/*if there is no suggested cut, the node will become a leave */
if (total_cuts <= cut_threshold) {
current_node->filters=filters;
filters_count+=filter_count;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
return current_node;
}
/*
if (total_cuts == 1) {
current_node->filters=filters;
filters_count+=filter_count;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
return current_node;
}
*/
current_node->branch=(node **)calloc(total_cuts, sizeof(node));
filter_lists=(id_list **)calloc(total_cuts, sizeof(id_list));
current_node->rectangle=(hyper_rectangle *)calloc(total_cuts, sizeof(hyper_rectangle));
/* Calculate the region of the hyper rectangle */
void rectangle_generation(node *);
rectangle_generation(current_node);
/* Decide the relevant rectangles of each filter by appending them into the associated link lists*/
void filter_dispatch(id_list *, node *, id_list **, UINT);
filter_dispatch(filters, current_node, filter_lists, total_cuts);
/*
bool rectanglecmp(hyper_rectangle, hyper_rectangle);
for (i=0; i<total_cuts; i++) {
if (rectanglecmp(rectangle, current_node->rectangle[i])) {
printf("\n %d, %d", current_node->cut[0], current_node->cut[1]);
for (UINT j=0; j<DIMENSIONS; j++)
printf("%x,%x, ", current_node->rectangle[i].start[j], current_node->rectangle[i].end[j]);
}
}
*/
#ifdef debug
printf("\n%d, %d, %d, %d, ", node_count, id_list_count(filters), current_node->filter_count, total_cuts);
for (i=0; i<DIMENSIONS; i++)
printf("%x,%x(%d), ", current_node->start[i], current_node->end[i], current_node->cut[i]);
#endif
if (current_node->filter_count == filter_count) {
for (i=0; i<total_cuts; i++)
current_node->branch[i]=NULL;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
filters_count+=filter_count;
return current_node;
}
for (i=0; i<total_cuts; i++)
if (filter_lists[i]!=NULL)
if (id_list_compare(filter_lists[i], current_node->filters))
filter_lists[i]=NULL;
#ifdef redundant_removal
rectangle_list *rectangles=(rectangle_list *)calloc(1, sizeof(rectangle_list)), *tmp;
rectangles->rectangle=current_node->rectangle[0];
rectangles->filters=filter_lists[0];
for (i=1; i<total_cuts; i++) {
bool flag=true;
for (tmp=rectangles; (tmp!=NULL) && flag; tmp=tmp->next)
if (id_list_compare(filter_lists[i], tmp->filters)) {
flag=false;
for (UINT j=0; j<DIMENSIONS; j++) {
tmp->rectangle.start[j]=min(current_node->rectangle[i].start[j], tmp->rectangle.start[j]);
tmp->rectangle.end[j]=max(current_node->rectangle[i].end[j], tmp->rectangle.end[j]);
}
// if (rectanglecmp(rectangle, tmp->rectangle))
// printf("");
}
if (flag) {
for (tmp=rectangles; tmp->next!=NULL; )
tmp=tmp->next;
tmp->next=(rectangle_list *)calloc(1, sizeof(rectangle_list));
tmp->next->rectangle=current_node->rectangle[i];
tmp->next->filters=filter_lists[i];
}
}
for (tmp=rectangles; tmp!=NULL; tmp=tmp->next)
if (tmp->filters!=NULL) {
node *temp_node=NULL;
if (rectanglecmp(rectangle, tmp->rectangle)) {
/* For the redundant branch removal, if the merge rectangle is equal to the original rectangle, it might result in infinite recursive calls,
Therefore, we must move these filters into the current node (just like the instruction specified in common_filter_extraction)*/
current_node->filters=tmp->filters;
current_node->filter_count=id_list_count(current_node->filters);
for (i=0; i<total_cuts; i++) {
if (id_list_compare(filter_lists[i], tmp->filters))
filter_lists[i]=NULL;
}
}
else {
temp_node=tree_construction(tmp->filters, tmp->rectangle, depth+1);
for (i=0; i<total_cuts; i++) {
if (current_node->branch[i]==NULL)
if (id_list_compare(filter_lists[i], tmp->filters))
current_node->branch[i]=temp_node;
}
}
}
#else
for (i=0; i<total_cuts; i++)
if (id_list_count(filter_lists[i])>0)
current_node->branch[i]=tree_construction(filter_lists[i], current_node->rectangle[i], depth+1);
#endif
branch_count+=total_cuts;
if (current_node->filter_count > 0)
branch_count++;
filters_count+=current_node->filter_count;
return current_node;
}
UINT optimal_cut(id_list *filters, UINT dimension, UINT maximal_cut, UINT start, UINT end) {
bool range_cover(UINT, UINT, UINT, UINT);
UINT id_list_count(id_list *), range_calculation(UINT, UINT, UINT);
UINT i, j;
UINT range, max_length, mean_length, empty_bucket, *tmp_array, prev_max_length=id_list_count(filters), prev_mean_length=id_list_count(filters); //, prev_empty_bucket=0;
UINT region_start, region_end, count=0, best_cut=2;
for (i=2; i<=maximal_cut; i*=2) {
tmp_array=(UINT *)calloc(i, sizeof(UINT));
range=range_calculation(start, end, i);
region_start=start;
region_end=start+range-1;
for (j=0; j<i; j++) {
for (id_list *head=filters; head != NULL; head=head->next) {
if (range_cover(region_start, region_end, head->filters->start[dimension], head->filters->end[dimension]))
tmp_array[j]++;
}
region_start=region_end+1;
region_end=min(region_start+range-1, end);
}
mean_length=max_length=tmp_array[0];
empty_bucket=0;
for (j=1; j<i; j++) {
if (tmp_array[j]==0)
empty_bucket++;
if (tmp_array[j]>max_length)
max_length=tmp_array[j];
mean_length+=tmp_array[j];
}
mean_length/=i;
free(tmp_array);
#define threshold 40
if (((prev_max_length-max_length)*threshold/prev_max_length) >= spfac)
// best_cut=i;
if (((prev_mean_length-mean_length)*threshold/prev_mean_length) >= spfac)
best_cut=i;
// if ((empty_bucket*threshold/i) <= spfac)
// best_cut=i;
}
return best_cut;
}
cut_calculation *cuts_optimization(hyper_rectangle rectangle, id_list *filters) {
UINT log2(UINT);
UINT id_list_count(id_list *);
UINT distinct_field_in_rectangle(id_list *, UINT, hyper_rectangle);
UINT filter_count=id_list_count(filters), maximal_cut, i;
int cut_sort( const void *, const void *);
cut_calculation *cuts=(cut_calculation *)calloc(DIMENSIONS, sizeof(cut_calculation));
float mean=0.0;
//计算两维域上规则的不同范围的个数
for (i=0; i<DIMENSIONS; i++) {
cuts[i].dimension=i;
cuts[i].distinct_field=distinct_field_in_rectangle(filters, i, rectangle);
}
//但是为什么要处以4呢?
cuts[0].normalized_count=cuts[0].distinct_field/4;
cuts[1].normalized_count=cuts[1].distinct_field/4;
for (i=0; i<DIMENSIONS; i++)
mean+=cuts[i].normalized_count;
//计算不同值的个数的平均值
mean/=DIMENSIONS;
//对每一维的cut按照不同范围的值的个数的大小进行排序,顺序是从小到大
qsort((void *)cuts, DIMENSIONS, sizeof(cut_calculation), cut_sort);
//maximal_cut是1左移i位第一次大于 f(N)的值 sqrt(x):计算x的平方根
maximal_cut=log2(sqrt((double)filter_count)*spfac);
bool flag=false;
if (maximal_cut >= 2)
flag=true;
float temp_count=0.0;
UINT rouding(float);
//float pow(float x, float y), 计算x的y次幂,返回幂指数的结果。
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
temp_count+=cuts[i].normalized_count;
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
cuts[i].maximal_cuts=pow(2.0, rouding((float)cuts[i].normalized_count*(float)maximal_cut/temp_count));
else
cuts[i].maximal_cuts=2;
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
cuts[i].cuts=min(cuts[i].maximal_cuts, optimal_cut(filters, cuts[i].dimension, cuts[i].maximal_cuts, rectangle.start[cuts[i].dimension], rectangle.end[cuts[i].dimension]));
else cuts[i].cuts=1;
return cuts;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -