⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 construct_idx.c

📁 数据库系统实现
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <io.h>
#include <fcntl.h>
#include <time.h >
#include "linear_hash.h"

extern BUCKET_BUF 		bucket;
extern DATA_BUF			data;
extern NTH_BLOCK_BUF	nth_blk_relation;
extern HASH_INFO 		hash_info;

extern long			DATA_CUR;//记录最后最后读入的数据块号
extern long			NTH_BLK_CUR;//记录最后读入的对应信息块号
extern long			SEARCH_CUR;

extern BUCKET_BUF		buk_buf[BUCKET_BUFFER_SIZE];
extern BUCKET_BUF		buk_over_buf[BUCKET_BUFFER_SIZE];
extern DATA_BUF		dat_buf[DATA_BUFFER_SIZE];
extern NTH_BLOCK_BUF	nth_blk_buf[NTH_BLOCK_BUFFER_SIZE];
void construct_idx()
{
	long i = 0;
	long j = 0;
	long flag = 0;
	long data_volume = 0;
	long p = 0;
	DATA_CUR = 0;
	while(DATA_CUR != -1)
	{
		j = 0;
		data_volume = 0;
		for(i = 0; i < DATA_BUFFER_SIZE; i++)
		{
			dat_buf[i].from_blk_no = -1;
			for(p = 0; p < LONG_IN_BLOCK; p++)
			{
				dat_buf[i].key[p] = -1;
			}
		}
		while ( j < DATA_BUFFER_SIZE )//读入需要加入的数据块
		{
			if( dat_buf[j].from_blk_no == -1 )
			{
				flag = read_dat("all200000.dat", DATA_CUR); 
				if(flag != 0)
				{
				dat_buf[j] = data;
				data_volume += flag;
				DATA_CUR++;
				}
				else
				{
					DATA_CUR = -1;
					break;
				}
			}
			j++;
		}
		scan_data(data_volume);
	}
}
void scan_data(long data_volume)
{
	long i = 0;
	long j = 0;
	long flag = 0;
	long old_bucket_no = 0;
	
	ITEM_IN_BUCKET *temp = NULL;
	NTH_BLK_CUR = 0;
	while( j < NTH_BLOCK_BUFFER_SIZE )//读入对应关系文件块
	{
		if (nth_blk_buf[j].from_blk_no == -1)
		{
			flag = read_nth_blk("nth_block.dat", NTH_BLK_CUR);
			if(flag == 1)
			{
				nth_blk_buf[j] = nth_blk_relation;
				NTH_BLK_CUR++;
			}
			else
			{
				NTH_BLK_CUR = 0;
				break;
			}
		}
		j++;
	}
	
	old_bucket_no = hash_info.bucket_no;
	renew_hash_info(data_volume);
	add_hash_table_bucket("hash_table.idx", old_bucket_no);//分配新的索引桶并初始化~
	printf(" The number of bits of the hash function that currently are used = %d.\n The current number of buckets = %d.\n The current number of records in the hash table = %d.\n",hash_info.hkey_i, hash_info.bucket_no,hash_info.rcd_no);

	i = 0;
	j = 0;
	while( j < NTH_BLOCK_BUFFER_SIZE )//将更改了的对应信息块写回
	{
		if (nth_blk_buf[j].change_flag == 1 && nth_blk_buf[j].from_blk_no != -1)
		{
			nth_blk_relation = nth_blk_buf[j];
			write_nth_blk("nth_block.dat", nth_blk_buf[j].from_blk_no);
			nth_blk_buf[j].from_blk_no = -1;
			j++;
			continue;
		}
		j++;
	}

	i = 0;
	j = hash_info.rcd_no;
	temp = (ITEM_IN_BUCKET *)malloc(j * sizeof(ITEM_IN_BUCKET));
	while( i < j )//初始化动态数组
	{
		temp[i].hkey = -1;
		temp[i].to_rcd.blk_no = -1;
		temp[i].to_rcd.offset = -1;
		i++;
	}

	NTH_BLK_CUR = 0;
	i = 0;
	j = 0;
	while( i < 2 )
	{
		j = 0;
		NTH_BLK_CUR = 0;
		while( j < NTH_BLOCK_BUFFER_SIZE )//读入对应关系文件块
		{
			if (nth_blk_buf[j].from_blk_no == -1)
			{
				flag = read_nth_blk("nth_block.dat", NTH_BLK_CUR);
				if(flag == 1)
				{
					nth_blk_buf[j] = nth_blk_relation;
					NTH_BLK_CUR++;
				}
				else
				{
					break;
				}
			}
			j++;
			if( j == NTH_BLOCK_BUFFER_SIZE ) 
			{
				scan_in_buf_nth_blk(data_volume, temp);
				j = 0;
			}
		}
		scan_in_buf_nth_blk(data_volume, temp);
		i++;
	}
	
	free(temp);
	
	for( i = 0; i < DATA_BUFFER_SIZE; i++ )
	{
		dat_buf[i].from_blk_no = -1;
	}
}
void scan_in_buf_nth_blk(long data_volume, ITEM_IN_BUCKET *temp)
{
	long i = 0;
	long j = 0;
	long m = 0;
	long n = 0;
	while( m < NTH_BLOCK_BUFFER_SIZE)//根据读入的对应信息文件 将相应块读入索引缓冲
	{												//可能索引缓冲满了,溢出桶也读入了~可以减少查找的I/O次数
		if(nth_blk_buf[m].from_blk_no != -1)
		{
			while( n < NTH_BLOCK_IN_BLOCK)
			{
				if( nth_blk_buf[m].nth_blk[n].nth_bucket != -1)
				{
					read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, nth_blk_buf[m].nth_blk[n].blk_no);
					while(j < BUCKET_BUFFER_SIZE)
					{	
						i = 0;
						while(bucket.buk.blk_no != -1)//判断当前桶的溢出桶个数
						{
							read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, bucket.buk.blk_no);
							i++;
						}
						
						if( (BUCKET_BUFFER_SIZE - j) >= (i + 1))//将桶和溢出桶读入
						{
							read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, nth_blk_buf[m].nth_blk[n].blk_no);
							buk_buf[j] = bucket;
							j++;
							while(bucket.buk.blk_no != -1)
							{
								read_idx("hash_table.idx", nth_blk_buf[m].nth_blk[n].nth_bucket, bucket.buk.blk_no);
								buk_buf[j] = bucket;
								j++;
							}
							if ( j == BUCKET_BUFFER_SIZE )
							{
								data_operate(data_volume, temp);
								j = 0;
								break;
							}
							else
								break;
						}
						else
						{
							data_operate(data_volume, temp);
							j = 0;
							break;
						}
					}
				}
				n++;
			}
		}
		m++;
	}
	data_operate(data_volume,temp);
	
	for( j = 0; j < NTH_BLOCK_BUFFER_SIZE; j++ )
	{
		nth_blk_buf[j].from_blk_no = -1;
	}
}

void data_operate(long data_volume,ITEM_IN_BUCKET *temp)
{
	long i = 0;
	long j = 0;
	long m = 0;
	long n = 0;
	long p = 0;
	long found_over = 0;
	long found = 0;
	long add_in = 0;
	long yeah = 0;
	long power = 0;
	long fd = 0;
	long length = 0;
	long flag = 0;
	FILE *fp = NULL;
	
	power = pow(2, hash_info.hkey_i - 1);
	
	while( i < BUCKET_BUFFER_SIZE )//将不属于此桶的数据取出来,放入temp动态数组中~
	{
		j = 0;
		while( j < HASH_SPACE )
		{
			if(buk_buf[i].buk.hk_addr[j].hkey != -1)
			{
				add_in = add_in_which(buk_buf[i].buk.hk_addr[j].hkey, hash_info.hkey_i);
				if( add_in < hash_info.bucket_no)
					flag = add_in;
				else
					flag = add_in - power;
			
				if( flag != buk_buf[i].nth_bucket)
				{
					for(m = 0; m < hash_info.rcd_no; m++)
					{
						if(temp[m].hkey == -1)
						{
							temp[m] = buk_buf[i].buk.hk_addr[j];
							buk_buf[i].buk.hk_addr[j].hkey = -1;
							buk_buf[i].buk.hk_addr[j].to_rcd.blk_no = -1;
							buk_buf[i].buk.hk_addr[j].to_rcd.offset = -1;
							buk_buf[i].buk.rcd_no--;
							buk_buf[i].change_flag = 1;
							break;
						}
					}
				}
			}
			j++;
		}
		i++;
	}
	
	i = 0;
	j = 0;
	m = 0;
	n = 0;
	found = 0;
	add_in = 0;
	flag = 0;
	
	fd = open("hash_table.idx", O_RDWR);
	length = filelength(fd)/BLOCK_SIZE;
	close(fd);
	
	while( i < DATA_BUFFER_SIZE )//将DATA_BUF中的数据加入到索引中,有可能需要增加溢出桶~
	{
		j = 0;
		while( j < LONG_IN_BLOCK )
		{
			if(dat_buf[i].key[j] != -1)
			{
				add_in = add_in_which(dat_buf[i].key[j], hash_info.hkey_i);
				if( add_in < hash_info.bucket_no)
					flag = add_in;
				else
					flag = add_in - power;
			
				m = 0;
				while( m < BUCKET_BUFFER_SIZE )
				{
					if(flag == buk_buf[m].nth_bucket)
					{
						found = 0;
						while(flag == buk_buf[m].nth_bucket)
						{
							n = 0;
							while( n < HASH_SPACE )
							{
								if(buk_buf[m].buk.hk_addr[n].hkey == -1)
								{
									buk_buf[m].buk.hk_addr[n].hkey = dat_buf[i].key[j];
									dat_buf[i].key[j] = -1;
									buk_buf[m].buk.hk_addr[n].to_rcd.blk_no = dat_buf[i].from_blk_no;
									buk_buf[m].buk.hk_addr[n].to_rcd.offset = sizeof(long) * j;
									buk_buf[m].change_flag = 1;
									buk_buf[m].buk.rcd_no++;
									found = 1;
									goto L;
								}
								n++;
							}
							m++;
						}
						if(found == 0)
						{
							m--;
							if(buk_over_buf[m].from_blk_no != -1)
							{
								if( buk_over_buf[m].buk.blk_no != -1)
								{
									bucket = buk_over_buf[m];
									found_over = 0;
									while(bucket.buk.blk_no != -1)
									{
										read_idx("hash_table.idx", buk_buf[m].nth_bucket, bucket.buk.blk_no);
										p = 0;
										while( p < HASH_SPACE )
										{
											if(bucket.buk.hk_addr[p].hkey == -1)
											{
												bucket.buk.hk_addr[p].hkey = dat_buf[i].key[j];
												dat_buf[i].key[j] = -1;
												bucket.buk.hk_addr[p].to_rcd.blk_no = dat_buf[i].from_blk_no;
												bucket.buk.hk_addr[p].to_rcd.offset = sizeof(long) * j;
												bucket.buk.rcd_no++;
												buk_over_buf[m] = bucket;
												buk_over_buf[m].change_flag = 1;
												found_over = 1;
												goto M;
											}
											p++;
										}
									}
									M: ;
									if( found_over == 0)
									{
										fp = fopen("hash_table.idx", "rb+");
										get_blk(fp);
										fclose(fp);
										length++;
										buk_over_buf[m].buk.blk_no = length - 1;
										buk_over_buf[m].change_flag = 1;
										bucket = buk_over_buf[m];
										write_idx("hash_table.idx", bucket.from_blk_no);
										read_idx("hash_table.idx",buk_buf[m].nth_bucket, length - 1);
										bucket.buk.blk_no = -1;
										bucket.buk.rcd_no = 1;
										bucket.buk.hk_addr[0].hkey = dat_buf[i].key[j];
										dat_buf[i].key[j] = -1;
										bucket.buk.hk_addr[0].to_rcd.blk_no = dat_buf[i].from_blk_no;
										bucket.buk.hk_addr[0].to_rcd.offset = sizeof(long) * j;						
										buk_over_buf[m] = bucket;
										buk_over_buf[m].change_flag = 1;
									}
								}
								else
								{
									for( p = 0; p < HASH_SPACE; p++ )
									{
										if(buk_over_buf[m].buk.hk_addr[p].hkey == -1)
										{
											buk_over_buf[m].buk.hk_addr[p].hkey = dat_buf[i].key[j];
											dat_buf[i].key[j] = -1;
											buk_over_buf[m].buk.hk_addr[p].to_rcd.blk_no = dat_buf[i].from_blk_no;
											buk_over_buf[m].buk.hk_addr[p].to_rcd.offset = sizeof(long) * j;
											buk_over_buf[m].buk.rcd_no++;	
											buk_over_buf[m].change_flag = 1;
											yeah = 1;
											break;
										}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -