📄 construct_idx.c
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <io.h>
#include <fcntl.h>
#include <time.h >
#include "linear_hash.h"
extern BUCKET_BUF bucket;
extern DATA_BUF data;
extern NTH_BLOCK_BUF nth_blk_relation;
extern HASH_INFO hash_info;
extern long DATA_CUR;//记录最后最后读入的数据块号
extern long NTH_BLK_CUR;//记录最后读入的对应信息块号
extern long SEARCH_CUR;
extern BUCKET_BUF buk_buf[BUCKET_BUFFER_SIZE];
extern BUCKET_BUF buk_over_buf[BUCKET_BUFFER_SIZE];
extern DATA_BUF dat_buf[DATA_BUFFER_SIZE];
extern NTH_BLOCK_BUF nth_blk_buf[NTH_BLOCK_BUFFER_SIZE];
void construct_idx()
{
long i = 0;
long j = 0;
long flag = 0;
long data_volume = 0;
long p = 0;
DATA_CUR = 0;
while(DATA_CUR != -1)
{
j = 0;
data_volume = 0;
for(i = 0; i < DATA_BUFFER_SIZE; i++)
{
dat_buf[i].from_blk_no = -1;
for(p = 0; p < LONG_IN_BLOCK; p++)
{
dat_buf[i].key[p] = -1;
}
}
while ( j < DATA_BUFFER_SIZE )//读入需要加入的数据块
{
if( dat_buf[j].from_blk_no == -1 )
{
flag = read_dat("all200000.dat", DATA_CUR);
if(flag != 0)
{
dat_buf[j] = data;
data_volume += flag;
DATA_CUR++;
}
else
{
DATA_CUR = -1;
break;
}
}
j++;
}
scan_data(data_volume);
}
}
void scan_data(long data_volume)
{
long i = 0;
long j = 0;
long flag = 0;
long old_bucket_no = 0;
ITEM_IN_BUCKET *temp = NULL;
NTH_BLK_CUR = 0;
while( j < NTH_BLOCK_BUFFER_SIZE )//读入对应关系文件块
{
if (nth_blk_buf[j].from_blk_no == -1)
{
flag = read_nth_blk("nth_block.dat", NTH_BLK_CUR);
if(flag == 1)
{
nth_blk_buf[j] = nth_blk_relation;
NTH_BLK_CUR++;
}
else
{
NTH_BLK_CUR = 0;
break;
}
}
j++;
}
old_bucket_no = hash_info.bucket_no;
renew_hash_info(data_volume);
add_hash_table_bucket("hash_table.idx", old_bucket_no);//分配新的索引桶并初始化~
printf(" The number of bits of the hash function that currently are used = %d.\n The current number of buckets = %d.\n The current number of records in the hash table = %d.\n",hash_info.hkey_i, hash_info.bucket_no,hash_info.rcd_no);
i = 0;
j = 0;
while( j < NTH_BLOCK_BUFFER_SIZE )//将更改了的对应信息块写回
{
if (nth_blk_buf[j].change_flag == 1 && nth_blk_buf[j].from_blk_no != -1)
{
nth_blk_relation = nth_blk_buf[j];
write_nth_blk("nth_block.dat", nth_blk_buf[j].from_blk_no);
nth_blk_buf[j].from_blk_no = -1;
j++;
continue;
}
j++;
}
i = 0;
j = hash_info.rcd_no;
temp = (ITEM_IN_BUCKET *)malloc(j * sizeof(ITEM_IN_BUCKET));
while( i < j )//初始化动态数组
{
temp[i].hkey = -1;
temp[i].to_rcd.blk_no = -1;
temp[i].to_rcd.offset = -1;
i++;
}
NTH_BLK_CUR = 0;
i = 0;
j = 0;
while( i < 2 )
{
j = 0;
NTH_BLK_CUR = 0;
while( j < NTH_BLOCK_BUFFER_SIZE )//读入对应关系文件块
{
if (nth_blk_buf[j].from_blk_no == -1)
{
flag = read_nth_blk("nth_block.dat", NTH_BLK_CUR);
if(flag == 1)
{
nth_blk_buf[j] = nth_blk_relation;
NTH_BLK_CUR++;
}
else
{
break;
}
}
j++;
if( j == NTH_BLOCK_BUFFER_SIZE )
{
scan_in_buf_nth_blk(data_volume, temp);
j = 0;
}
}
scan_in_buf_nth_blk(data_volume, temp);
i++;
}
free(temp);
for( i = 0; i < DATA_BUFFER_SIZE; i++ )
{
dat_buf[i].from_blk_no = -1;
}
}
void scan_in_buf_nth_blk(long data_volume, ITEM_IN_BUCKET *temp)
{
long i = 0;
long j = 0;
long m = 0;
long n = 0;
while( m < NTH_BLOCK_BUFFER_SIZE)//根据读入的对应信息文件 将相应块读入索引缓冲
{ //可能索引缓冲满了,溢出桶也读入了~可以减少查找的I/O次数
if(nth_blk_buf[m].from_blk_no != -1)
{
while( n < NTH_BLOCK_IN_BLOCK)
{
if( nth_blk_buf[m].nth_blk[n].nth_bucket != -1)
{
read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, nth_blk_buf[m].nth_blk[n].blk_no);
while(j < BUCKET_BUFFER_SIZE)
{
i = 0;
while(bucket.buk.blk_no != -1)//判断当前桶的溢出桶个数
{
read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, bucket.buk.blk_no);
i++;
}
if( (BUCKET_BUFFER_SIZE - j) >= (i + 1))//将桶和溢出桶读入
{
read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, nth_blk_buf[m].nth_blk[n].blk_no);
buk_buf[j] = bucket;
j++;
while(bucket.buk.blk_no != -1)
{
read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, bucket.buk.blk_no);
buk_buf[j] = bucket;
j++;
}
if ( j == BUCKET_BUFFER_SIZE )
{
data_operate(data_volume, temp);
j = 0;
break;
}
else
break;
}
else
{
data_operate(data_volume, temp);
j = 0;
break;
}
}
}
n++;
}
}
m++;
}
data_operate(data_volume,temp);
for( j = 0; j < NTH_BLOCK_BUFFER_SIZE; j++ )
{
nth_blk_buf[j].from_blk_no = -1;
}
}
void data_operate(long data_volume,ITEM_IN_BUCKET *temp)
{
long i = 0;
long j = 0;
long m = 0;
long n = 0;
long p = 0;
long found_over = 0;
long found = 0;
long add_in = 0;
long yeah = 0;
long power = 0;
long fd = 0;
long length = 0;
long flag = 0;
FILE *fp = NULL;
power = pow(2, hash_info.hkey_i - 1);
while( i < BUCKET_BUFFER_SIZE )//将不属于此桶的数据取出来,放入temp动态数组中~
{
j = 0;
while( j < HASH_SPACE )
{
if(buk_buf[i].buk.hk_addr[j].hkey != -1)
{
add_in = add_in_which(buk_buf[i].buk.hk_addr[j].hkey, hash_info.hkey_i);
if( add_in < hash_info.bucket_no)
flag = add_in;
else
flag = add_in - power;
if( flag != buk_buf[i].nth_bucket)
{
for(m = 0; m < hash_info.rcd_no; m++)
{
if(temp[m].hkey == -1)
{
temp[m] = buk_buf[i].buk.hk_addr[j];
buk_buf[i].buk.hk_addr[j].hkey = -1;
buk_buf[i].buk.hk_addr[j].to_rcd.blk_no = -1;
buk_buf[i].buk.hk_addr[j].to_rcd.offset = -1;
buk_buf[i].buk.rcd_no--;
buk_buf[i].change_flag = 1;
break;
}
}
}
}
j++;
}
i++;
}
i = 0;
j = 0;
m = 0;
n = 0;
found = 0;
add_in = 0;
flag = 0;
fd = open("hash_table.idx", O_RDWR);
length = filelength(fd)/BLOCK_SIZE;
close(fd);
while( i < DATA_BUFFER_SIZE )//将DATA_BUF中的数据加入到索引中,有可能需要增加溢出桶~
{
j = 0;
while( j < LONG_IN_BLOCK )
{
if(dat_buf[i].key[j] != -1)
{
add_in = add_in_which(dat_buf[i].key[j], hash_info.hkey_i);
if( add_in < hash_info.bucket_no)
flag = add_in;
else
flag = add_in - power;
m = 0;
while( m < BUCKET_BUFFER_SIZE )
{
if(flag == buk_buf[m].nth_bucket)
{
found = 0;
while(flag == buk_buf[m].nth_bucket)
{
n = 0;
while( n < HASH_SPACE )
{
if(buk_buf[m].buk.hk_addr[n].hkey == -1)
{
buk_buf[m].buk.hk_addr[n].hkey = dat_buf[i].key[j];
dat_buf[i].key[j] = -1;
buk_buf[m].buk.hk_addr[n].to_rcd.blk_no = dat_buf[i].from_blk_no;
buk_buf[m].buk.hk_addr[n].to_rcd.offset = sizeof(long) * j;
buk_buf[m].change_flag = 1;
buk_buf[m].buk.rcd_no++;
found = 1;
goto L;
}
n++;
}
m++;
}
if(found == 0)
{
m--;
if(buk_over_buf[m].from_blk_no != -1)
{
if( buk_over_buf[m].buk.blk_no != -1)
{
bucket = buk_over_buf[m];
found_over = 0;
while(bucket.buk.blk_no != -1)
{
read_idx("hash_table.idx", buk_buf[m].nth_bucket, bucket.buk.blk_no);
p = 0;
while( p < HASH_SPACE )
{
if(bucket.buk.hk_addr[p].hkey == -1)
{
bucket.buk.hk_addr[p].hkey = dat_buf[i].key[j];
dat_buf[i].key[j] = -1;
bucket.buk.hk_addr[p].to_rcd.blk_no = dat_buf[i].from_blk_no;
bucket.buk.hk_addr[p].to_rcd.offset = sizeof(long) * j;
bucket.buk.rcd_no++;
buk_over_buf[m] = bucket;
buk_over_buf[m].change_flag = 1;
found_over = 1;
goto M;
}
p++;
}
}
M: ;
if( found_over == 0)
{
fp = fopen("hash_table.idx", "rb+");
get_blk(fp);
fclose(fp);
length++;
buk_over_buf[m].buk.blk_no = length - 1;
buk_over_buf[m].change_flag = 1;
bucket = buk_over_buf[m];
write_idx("hash_table.idx", bucket.from_blk_no);
read_idx("hash_table.idx",buk_buf[m].nth_bucket, length - 1);
bucket.buk.blk_no = -1;
bucket.buk.rcd_no = 1;
bucket.buk.hk_addr[0].hkey = dat_buf[i].key[j];
dat_buf[i].key[j] = -1;
bucket.buk.hk_addr[0].to_rcd.blk_no = dat_buf[i].from_blk_no;
bucket.buk.hk_addr[0].to_rcd.offset = sizeof(long) * j;
buk_over_buf[m] = bucket;
buk_over_buf[m].change_flag = 1;
}
}
else
{
for( p = 0; p < HASH_SPACE; p++ )
{
if(buk_over_buf[m].buk.hk_addr[p].hkey == -1)
{
buk_over_buf[m].buk.hk_addr[p].hkey = dat_buf[i].key[j];
dat_buf[i].key[j] = -1;
buk_over_buf[m].buk.hk_addr[p].to_rcd.blk_no = dat_buf[i].from_blk_no;
buk_over_buf[m].buk.hk_addr[p].to_rcd.offset = sizeof(long) * j;
buk_over_buf[m].buk.rcd_no++;
buk_over_buf[m].change_flag = 1;
yeah = 1;
break;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -