📄 hashtable.c
字号:
/* Copyright (C) (2007) (Benoit Favre) <favre@icsi.berkeley.edu>This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include "hashtable.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#define MIN(a,b) ((a)>(b)?(b):(a))#define HASHTABLE_VERSION 3#define HASHTABLE_MAGIC 0xabcdef00+HASHTABLE_VERSION#ifdef HASHTABLE_CENTRALIZE_KEYShashtable_t* _hashtable_key_repository=NULL;#endifuint32_t _hashtable_function(const void* key,size_t key_length){ uint32_t hash = 0; int i; for(i=0;i<key_length;i++) { hash += *(char*)(key+i); hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash;}int _hashtable_key_equals(const void* key1, size_t key1_length, const void* key2, size_t key2_length){ if(key1_length!=key2_length)return 0; return !memcmp(key1,key2,key1_length);}hashtable_t* hashtable_new_size_collisions_factor(size_t initial_capacity, size_t max_average_collisions, double resize_factor){ hashtable_t* output=MALLOC(sizeof(hashtable_t)); output->buckets=MALLOC(sizeof(hashelement_t*)*initial_capacity); memset(output->buckets,0,sizeof(hashelement_t*)*initial_capacity); output->length=0; output->size=initial_capacity; output->current_element=NULL;#ifdef HASHTABLE_INCREASE_SIZE output->max_average_collisions=max_average_collisions; output->resize_factor=resize_factor;#endif#ifdef HASHTABLE_GATHER_STATS output->num_accesses=0; output->num_hits_per_access=0;#endif return output;}hashtable_t* string_array_to_hashtable(array_t* input){ hashtable_t* output=hashtable_new(); int i; if(input->length % 2 != 0)warn("hashtable_new_from_string_array(%p), odd number of element in array (%d), last element ignored", input, input->length); for(i=0; i<input->length; i+=2) { string_t* key=array_get(input, i); string_t* value=array_get(input, i+1); hashtable_set(output, key->data, key->length, string_copy(value)); } return output;}array_t* string_array_from_hashtable(hashtable_t* input){ array_t* output=array_new(); hashelement_t* element=hashtable_first_element(input); while(element!=NULL) { array_push(output, string_new_from_to(element->key, 0, element->key_length)); array_push(output, string_copy(element->value)); element=hashtable_next_element(input); } return output;}void hashtable_set(hashtable_t* input, const void* key, size_t key_length, void* value){ //fprintf(stderr,"store(%s,%p)\n",key,value); uint32_t hashcode=_hashtable_function(key, key_length); uint32_t bucket=hashcode%input->size; hashelement_t* element=input->buckets[bucket];#ifdef HASHTABLE_REORDER_ON_ACCESS hashelement_t* previous_element=NULL;#endif#ifdef HASHTABLE_GATHER_STATS unsigned int num_hits=1;#endif while(element!=NULL) { if(element->key_length==key_length && memcmp(element->key,key,key_length)==0) { element->value=value;#ifdef HASHTABLE_GATHER_STATS input->num_hits_per_access+=num_hits; input->num_accesses++;#endif#ifdef HASHTABLE_REORDER_ON_ACCESS if(previous_element!=NULL) { previous_element->next_in_bucket=element->next_in_bucket; element->next_in_bucket=input->buckets[bucket]; input->buckets[bucket]=element; }#endif return; }#ifdef HASHTABLE_GATHER_STATS num_hits++;#endif#ifdef HASHTABLE_REORDER_ON_ACCESS previous_element=element;#endif element=element->next_in_bucket; }#ifdef HASHTABLE_GATHER_STATS input->num_hits_per_access+=num_hits; input->num_accesses++;#endif element=MALLOC(sizeof(hashelement_t)); element->hashcode=hashcode;#ifdef HASHTABLE_CENTRALIZE_KEYS if(input==_hashtable_key_repository) { element->key=key; } else { if(_hashtable_key_repository==NULL) _hashtable_key_repository=hashtable_new(); char* repository_key=hashtable_fetch(_hashtable_key_repository, key, key_length); if(repository_key==NULL) { repository_key=MALLOC(key_length); memcpy(repository_key,key,key_length); hashtable_store(_hashtable_key_repository, repository_key, key_length, repository_key); } element->key=repository_key; }#else element->key=MALLOC(key_length); memcpy(element->key,key,key_length);#endif element->key_length=key_length; element->value=value; element->next_in_bucket=input->buckets[bucket]; input->buckets[bucket]=element; input->length++;#ifdef HASHTABLE_INCREASE_SIZE if(input->length/input->size>input->max_average_collisions)hashtable_resize(input, (size_t)(input->size*(input->resize_factor)+1));//+input->size);#endif}void* hashtable_remove(hashtable_t* input, const void* key, size_t key_length){ uint32_t hashcode=_hashtable_function(key, key_length); uint32_t bucket=hashcode%input->size; hashelement_t* element=input->buckets[bucket]; hashelement_t* previous_element=NULL;#ifdef HASHTABLE_GATHER_STATS unsigned int num_hits=0;#endif while(element!=NULL) { if(element->key_length==key_length && memcmp(element->key,key,key_length)==0) { void* value=element->value; if(previous_element==NULL) input->buckets[bucket]=element->next_in_bucket; else previous_element->next_in_bucket=element->next_in_bucket; FREE(element->key); FREE(element); input->length--;#ifdef HASHTABLE_GATHER_STATS input->num_accesses++; input->num_hits_per_access+=num_hits;#endif return value; }#ifdef HASHTABLE_GATHER_STATS num_hits++;#endif previous_element=element; element=element->next_in_bucket; }#ifdef HASHTABLE_GATHER_STATS input->num_accesses++; input->num_hits_per_access+=num_hits;#endif return NULL;}void* hashtable_get(hashtable_t* input, const void* key, size_t key_length){ //fprintf(stderr,"fetch(%s)\n",key); uint32_t hashcode=_hashtable_function(key, key_length); uint32_t bucket=hashcode%input->size; if(input->buckets[bucket]==NULL)return NULL; hashelement_t* element=input->buckets[bucket];#ifdef HASHTABLE_REORDER_ON_ACCESS hashelement_t* previous_element=NULL;#endif#ifdef HASHTABLE_GATHER_STATS unsigned int num_hits=1;#endif while(element!=NULL) { if(_hashtable_key_equals(element->key,element->key_length,key,key_length)) {#ifdef HASHTABLE_GATHER_STATS input->num_hits_per_access+=num_hits; input->num_accesses++;#endif#ifdef HASHTABLE_REORDER_ON_ACCESS if(previous_element!=NULL) { previous_element->next_in_bucket=element->next_in_bucket; element->next_in_bucket=input->buckets[bucket]; input->buckets[bucket]=element; }#endif return element->value; }#ifdef HASHTABLE_REORDER_ON_ACCESS previous_element=element;#endif element=element->next_in_bucket;#ifdef HASHTABLE_GATHER_STATS num_hits++;#endif }#ifdef HASHTABLE_GATHER_STATS input->num_hits_per_access+=num_hits; input->num_accesses++;#endif return NULL;}void* hashtable_get_or_default(hashtable_t* h,const void* key,size_t key_length,void* defaultValue){ void* value=hashtable_get(h, key, key_length); if(value==NULL)return defaultValue; return value;}off_t hashtable_get_from_file(FILE* file,const void* key,size_t key_length){ int j; int size; off_t begining=ftello(file); int magic=-1; fread(&magic,sizeof(int),1,file); if(magic!=HASHTABLE_MAGIC) { warn("bad magic %d!=%d",magic,HASHTABLE_MAGIC); return (off_t)-1; } fread(&size,sizeof(int),1,file); uint32_t bucket=_hashtable_function(key,key_length)%size; off_t bucketLocation=-1; int bucketLength=-1,bucketSize=-1; fseeko(file,(off_t)(bucket*(sizeof(int)*2+sizeof(off_t))),SEEK_CUR); fread(&bucketLocation,sizeof(off_t),1,file); fread(&bucketLength,sizeof(int),1,file); fread(&bucketSize,sizeof(int),1,file); //warn("bucket=%d location=%lld length=%d size=%d",bucket,bucketLocation,bucketLength,bucketSize); if(bucketLength!=0) { fseeko(file,bucketLocation,SEEK_SET); char buffer[bucketSize]; fread(buffer,bucketSize,1,file); char* ptr=buffer; for(j=0;j<bucketLength;j++) { size_t read_key_length=*(size_t*)ptr; ptr+=sizeof(size_t); //warn("%p %d %p %d\n",key,key_length,ptr,read_key_length); if(_hashtable_key_equals(key,key_length,ptr,read_key_length)) { ptr+=read_key_length; off_t location=*(off_t*)ptr; fseeko(file,begining,SEEK_SET); return location; } ptr+=read_key_length+sizeof(off_t); } } fseeko(file,begining,SEEK_SET); //warn("not found"); return -1;}off_t hashtable_get_from_mapped(mapped_t* mapped,const void* key,size_t key_length){ int j; int pointer=0; int magic=*(int*)(mapped->data); if(magic!=HASHTABLE_MAGIC) { warn("bad magic %x!=%x",magic,HASHTABLE_MAGIC); return (off_t)-1; } pointer+=sizeof(int); int size=*(int*)(mapped->data+pointer); pointer+=sizeof(int); uint32_t bucket=_hashtable_function(key,key_length)%size; pointer+=bucket*(sizeof(int)*2+sizeof(off_t)); off_t bucketLocation=*(off_t*)(mapped->data+pointer); pointer+=sizeof(off_t); int bucketLength=*(int*)(mapped->data+pointer); pointer+=sizeof(int); //int bucketSize=*(int*)(mapped->data+pointer); pointer+=sizeof(int); //fprintf(stderr,"magic=%x size=%d bucket=%d bucketLocation=%lld bucketLength=%d\n",magic,size,bucket,bucketLocation,bucketLength); if(bucketLength!=0) { pointer=bucketLocation; char* buffer=(char*)(mapped->data+pointer); char* ptr=buffer; for(j=0;j<bucketLength;j++) { //fprintf(stderr,"j=%d ptr=%d\n",j,ptr-buffer); size_t read_key_length=*(size_t*)ptr; ptr+=sizeof(size_t); if(_hashtable_key_equals(key,key_length,ptr,read_key_length)) { ptr+=read_key_length; off_t location=*(off_t*)ptr; return location; } ptr+=read_key_length+sizeof(off_t);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -